From 926259c411c1022812ffb7fe88ca61f0180bd778 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Thu, 14 Dec 2017 09:51:09 +0800 Subject: [PATCH 001/434] TST: test case for string --- tensorflow/python/kernel_tests/scatter_nd_ops_test.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tensorflow/python/kernel_tests/scatter_nd_ops_test.py b/tensorflow/python/kernel_tests/scatter_nd_ops_test.py index 9f57949515..83d69c651a 100644 --- a/tensorflow/python/kernel_tests/scatter_nd_ops_test.py +++ b/tensorflow/python/kernel_tests/scatter_nd_ops_test.py @@ -364,6 +364,16 @@ class ScatterNdTest(test.TestCase): del input_ # input_ is not used in scatter_nd return array_ops.scatter_nd(indices, updates, shape) + def testString(self): + indices = constant_op.constant([[4], [3], [1], [7]], dtype=dtypes.int32) + updates = constant_op.constant(["four", "three", "one", "seven"], dtype=dtypes.string) + expected = np.array(["", "one", "", "three", "four", "", "", "seven"]) + scatter = self.scatter_nd(indices, updates, shape=(8,)) + + with self.test_session() as sess: + result = sess.run(scatter) + self.assertTrue(np.array_equal(result, expected)) + def testRank3ValidShape(self): indices = array_ops.zeros([2, 2, 2], dtypes.int32) updates = array_ops.zeros([2, 2, 2], dtypes.int32) -- GitLab From 005840c6e2d2a4c25ecd293162a38a79dedf1a4a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Thu, 14 Dec 2017 10:06:44 +0800 Subject: [PATCH 002/434] ENH: supports string for cpu --- tensorflow/core/kernels/scatter_nd_op.cc | 1 + tensorflow/core/kernels/scatter_nd_op_cpu_impl.h | 1 + 2 files changed, 2 insertions(+) diff --git a/tensorflow/core/kernels/scatter_nd_op.cc b/tensorflow/core/kernels/scatter_nd_op.cc index 3a95dd1773..0caa7bd317 100644 --- a/tensorflow/core/kernels/scatter_nd_op.cc +++ b/tensorflow/core/kernels/scatter_nd_op.cc @@ -241,6 +241,7 @@ class ScatterNdUpdateOp : public OpKernel { TF_CALL_NUMBER_TYPES(REGISTER_SCATTER_ND_ADD_SUB_CPU); TF_CALL_NUMBER_TYPES(REGISTER_SCATTER_ND_UPDATE_CPU); TF_CALL_NUMBER_TYPES(REGISTER_SCATTER_ND_CPU); +TF_CALL_string(REGISTER_SCATTER_ND_CPU); // Registers GPU kernels. #if GOOGLE_CUDA diff --git a/tensorflow/core/kernels/scatter_nd_op_cpu_impl.h b/tensorflow/core/kernels/scatter_nd_op_cpu_impl.h index cffc326174..155d354d85 100644 --- a/tensorflow/core/kernels/scatter_nd_op_cpu_impl.h +++ b/tensorflow/core/kernels/scatter_nd_op_cpu_impl.h @@ -160,6 +160,7 @@ struct ScatterNdFunctor { REGISTER_SCATTER_ND_INDEX(type, scatter_nd_op::UpdateOp::SUB); TF_CALL_ALL_TYPES(REGISTER_SCATTER_ND_UPDATE); +REGISTER_SCATTER_ND_INDEX(string, scatter_nd_op::UpdateOp::ADD); TF_CALL_NUMBER_TYPES(REGISTER_SCATTER_ND_MATH) #undef REGISTER_SCATTER_ND_MATH -- GitLab From d887d2bcfc819034b17e812a9a60460e2d61e447 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Thu, 14 Dec 2017 12:14:40 +0800 Subject: [PATCH 003/434] TST: ignore NonAliasingAdd --- tensorflow/python/kernel_tests/scatter_nd_ops_test.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tensorflow/python/kernel_tests/scatter_nd_ops_test.py b/tensorflow/python/kernel_tests/scatter_nd_ops_test.py index 83d69c651a..03b2f892c6 100644 --- a/tensorflow/python/kernel_tests/scatter_nd_ops_test.py +++ b/tensorflow/python/kernel_tests/scatter_nd_ops_test.py @@ -594,6 +594,10 @@ class ScatterNdNonAliasingAddTest(ScatterNdTest): shape, dtype=updates.dtype)) return array_ops.scatter_nd_non_aliasing_add(input_, indices, updates) + def testString(self): + # Not supported yet. + pass + if __name__ == "__main__": test.main() -- GitLab From 4b697e0d9472215c706bdb36bb72986cdce78edd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Thu, 14 Dec 2017 13:51:34 +0800 Subject: [PATCH 004/434] DOC: modify document --- tensorflow/core/ops/array_ops.cc | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc index 5a31f433ce..933ebe6b63 100644 --- a/tensorflow/core/ops/array_ops.cc +++ b/tensorflow/core/ops/array_ops.cc @@ -5332,12 +5332,13 @@ REGISTER_OP("ScatterNd") .Attr("Tindices: {int32, int64}") .SetShapeFn(ScatterNdShape) .Doc(R"doc( -Scatter `updates` into a new (initially zero) tensor according to `indices`. +Scatter `updates` into a new (initially zero for numeric, empty for string) +tensor according to `indices`. -Creates a new tensor by applying sparse `updates` to individual -values or slices within a zero tensor of the given `shape` according to -indices. This operator is the inverse of the @{tf.gather_nd} operator which -extracts values or slices from a given tensor. +Creates a new tensor by applying sparse `updates` to individual values or +slices within a zero (or empty string) tensor of the given `shape` +according to indices. This operator is the inverse of the @{tf.gather_nd} +operator which extracts values or slices from a given tensor. **WARNING**: The order in which updates are applied is nondeterministic, so the output will be nondeterministic if `indices` contains duplicates. -- GitLab From 597403e03680d69b72dbfa669f7bbdc77ce21ec9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Wed, 20 Dec 2017 16:34:48 +0800 Subject: [PATCH 005/434] CLN: conform docstring --- tensorflow/core/ops/array_ops.cc | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc index 933ebe6b63..89b6eb7162 100644 --- a/tensorflow/core/ops/array_ops.cc +++ b/tensorflow/core/ops/array_ops.cc @@ -5332,13 +5332,12 @@ REGISTER_OP("ScatterNd") .Attr("Tindices: {int32, int64}") .SetShapeFn(ScatterNdShape) .Doc(R"doc( -Scatter `updates` into a new (initially zero for numeric, empty for string) -tensor according to `indices`. +Scatter `updates` into a new empty tensor according to `indices`. Creates a new tensor by applying sparse `updates` to individual values or -slices within a zero (or empty string) tensor of the given `shape` -according to indices. This operator is the inverse of the @{tf.gather_nd} -operator which extracts values or slices from a given tensor. +slices within a tensor (initially zero for numeric, empty for string) of +the given `shape` according to indices. This operator is the inverse of the +@{tf.gather_nd} operator which extracts values or slices from a given tensor. **WARNING**: The order in which updates are applied is nondeterministic, so the output will be nondeterministic if `indices` contains duplicates. -- GitLab From 9c272adf248228408448db6219b238145f5a02ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Fri, 16 Feb 2018 10:38:50 +0800 Subject: [PATCH 006/434] DOC: move doc to api def file --- .../core/api_def/base_api/api_def_ScatterNd.pbtxt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tensorflow/core/api_def/base_api/api_def_ScatterNd.pbtxt b/tensorflow/core/api_def/base_api/api_def_ScatterNd.pbtxt index 4cb8c064fc..4e95895f54 100644 --- a/tensorflow/core/api_def/base_api/api_def_ScatterNd.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_ScatterNd.pbtxt @@ -25,12 +25,12 @@ A new tensor with the given shape and updates applied according to the indices. END } - summary: "Scatter `updates` into a new (initially zero) tensor according to `indices`." + summary: "Scatter `updates` into a new empty tensor according to `indices`." description: < Date: Wed, 7 Mar 2018 08:11:03 -0800 Subject: [PATCH 007/434] C++ gradient for StridedSlice See https://github.com/tensorflow/tensorflow/issues/9645 --- tensorflow/cc/gradients/array_grad.cc | 36 ++++++++++++++++++++++ tensorflow/cc/gradients/array_grad_test.cc | 24 +++++++++++++++ 2 files changed, 60 insertions(+) diff --git a/tensorflow/cc/gradients/array_grad.cc b/tensorflow/cc/gradients/array_grad.cc index 6545e4ee3e..ff348fadb2 100644 --- a/tensorflow/cc/gradients/array_grad.cc +++ b/tensorflow/cc/gradients/array_grad.cc @@ -385,6 +385,42 @@ Status MirrorPadGradGrad(const Scope& scope, const Operation& op, } REGISTER_GRADIENT_OP("MirrorPadGrad", MirrorPadGradGrad); +Status StridedSliceGradHelper(const Scope& scope, const Operation& op, + const std::vector& grad_inputs, + std::vector* grad_outputs) { + Input x = Shape(scope, op.input(0)); + Input begin = op.input(1); + Input end = op.input(2); + Input strides = op.input(3); + int64 begin_mask; + int64 end_mask; + int64 ellipsis_mask; + int64 new_axis_mask; + int64 shrink_axis_mask; + TF_RETURN_IF_ERROR( + GetNodeAttr(op.node()->attrs(), "begin_mask", &begin_mask)); + TF_RETURN_IF_ERROR(GetNodeAttr(op.node()->attrs(), "end_mask", &end_mask)); + TF_RETURN_IF_ERROR( + GetNodeAttr(op.node()->attrs(), "ellipsis_mask", &ellipsis_mask)); + TF_RETURN_IF_ERROR( + GetNodeAttr(op.node()->attrs(), "new_axis_mask", &new_axis_mask)); + TF_RETURN_IF_ERROR( + GetNodeAttr(op.node()->attrs(), "shrink_axis_mask", &shrink_axis_mask)); + grad_outputs->push_back( + StridedSliceGrad(scope, x, begin, end, strides, grad_inputs[0], + StridedSliceGrad::BeginMask(begin_mask) + .EndMask(end_mask) + .EllipsisMask(ellipsis_mask) + .NewAxisMask(new_axis_mask) + .ShrinkAxisMask(shrink_axis_mask))); + // No gradients returned for begin, end and strides + grad_outputs->push_back(NoGradient()); + grad_outputs->push_back(NoGradient()); + grad_outputs->push_back(NoGradient()); + return scope.status(); +} +REGISTER_GRADIENT_OP("StridedSlice", StridedSliceGradHelper); + } // anonymous namespace } // namespace ops } // namespace tensorflow diff --git a/tensorflow/cc/gradients/array_grad_test.cc b/tensorflow/cc/gradients/array_grad_test.cc index 4a215fcc92..2a2180297c 100644 --- a/tensorflow/cc/gradients/array_grad_test.cc +++ b/tensorflow/cc/gradients/array_grad_test.cc @@ -354,5 +354,29 @@ TEST_F(ArrayGradTest, MirrorPadGradGrad_Symmetric) { RunTest(x, x_shape, y, y_shape); } +TEST_F(ArrayGradTest, StridedSliceGrad) { + TensorShape x_shape({6, 4, 4}); + auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(x_shape)); + + // y = x[2:6:2, 1:3, 1:3] + auto y = StridedSlice(scope_, x, {2, 1, 1}, {6, 3, 3}, {2, 1, 1}); + // y.shape = [2, 2, 2]; + RunTest(x, x_shape, y, {2, 2, 2}); + + // y = x[2:6:2, 1:3, 1:3] + // begin_mask = 1<<1 (ignore begin_index = 1) + // end_mask = 1<<2 (ignore end_index = 2) + y = StridedSlice(scope_, x, {2, 1, 1}, {6, 3, 3}, {2, 1, 1}, + StridedSlice::BeginMask(1<<1).EndMask(1<<2)); + // y.shape = [2, 3, 3]; + RunTest(x, x_shape, y, {2, 3, 3}); + + // y = [tf.newaxis, 2:6:2, 1:3, 1:3] + y = StridedSlice(scope_, x, {0, 2, 1, 1}, {0, 6, 3, 3}, {1, 2, 1, 1}, + StridedSlice::NewAxisMask(1<<0)); + // y.shape = [1, 2, 2, 2]; + RunTest(x, x_shape, y, {1, 2, 2, 2}); +} + } // namespace } // namespace tensorflow -- GitLab From e31fb25f4e3989a846a8e54d789a3bf5efff0cea Mon Sep 17 00:00:00 2001 From: KB Sriram Date: Thu, 8 Mar 2018 07:40:24 -0800 Subject: [PATCH 008/434] Clang-format fixes. --- tensorflow/cc/gradients/array_grad_test.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/cc/gradients/array_grad_test.cc b/tensorflow/cc/gradients/array_grad_test.cc index 2a2180297c..de3bd0fc9e 100644 --- a/tensorflow/cc/gradients/array_grad_test.cc +++ b/tensorflow/cc/gradients/array_grad_test.cc @@ -367,13 +367,13 @@ TEST_F(ArrayGradTest, StridedSliceGrad) { // begin_mask = 1<<1 (ignore begin_index = 1) // end_mask = 1<<2 (ignore end_index = 2) y = StridedSlice(scope_, x, {2, 1, 1}, {6, 3, 3}, {2, 1, 1}, - StridedSlice::BeginMask(1<<1).EndMask(1<<2)); + StridedSlice::BeginMask(1 << 1).EndMask(1 << 2)); // y.shape = [2, 3, 3]; RunTest(x, x_shape, y, {2, 3, 3}); // y = [tf.newaxis, 2:6:2, 1:3, 1:3] y = StridedSlice(scope_, x, {0, 2, 1, 1}, {0, 6, 3, 3}, {1, 2, 1, 1}, - StridedSlice::NewAxisMask(1<<0)); + StridedSlice::NewAxisMask(1 << 0)); // y.shape = [1, 2, 2, 2]; RunTest(x, x_shape, y, {1, 2, 2, 2}); } -- GitLab From fe46c22a80b068b2b30f1e44f2f950ba6b6e907b Mon Sep 17 00:00:00 2001 From: Joe Yearsley Date: Fri, 9 Mar 2018 22:41:37 +0000 Subject: [PATCH 009/434] Update fold_old_batch_norms.cc Fixes the problem of using fused batch normalization and this transform, only shows up when using 'NCHW' as the default is 'NHWC'. --- tensorflow/tools/graph_transforms/fold_old_batch_norms.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/tools/graph_transforms/fold_old_batch_norms.cc b/tensorflow/tools/graph_transforms/fold_old_batch_norms.cc index d86f65325b..a5acd53ad6 100644 --- a/tensorflow/tools/graph_transforms/fold_old_batch_norms.cc +++ b/tensorflow/tools/graph_transforms/fold_old_batch_norms.cc @@ -159,6 +159,7 @@ Status FuseScaleOffsetToConvWeights(const std::vector& scale_values, NodeDef bias_add_node; bias_add_node.set_op("BiasAdd"); bias_add_node.set_name(conv_output_name); + bias_add_op.attr["data_format"].CopyFrom(conv_node.attr["data_format"]) CopyNodeAttr(conv_node, "T", "T", &bias_add_node); AddNodeInput(conv_node.name(), &bias_add_node); AddNodeInput(bias_offset_node.name(), &bias_add_node); -- GitLab From 1ad788b136d509888cf7d484f762e31b2ee37a50 Mon Sep 17 00:00:00 2001 From: Joe Yearsley Date: Fri, 9 Mar 2018 22:46:30 +0000 Subject: [PATCH 010/434] Update fold_old_batch_norms.cc --- tensorflow/tools/graph_transforms/fold_old_batch_norms.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/tools/graph_transforms/fold_old_batch_norms.cc b/tensorflow/tools/graph_transforms/fold_old_batch_norms.cc index a5acd53ad6..3376a81312 100644 --- a/tensorflow/tools/graph_transforms/fold_old_batch_norms.cc +++ b/tensorflow/tools/graph_transforms/fold_old_batch_norms.cc @@ -159,7 +159,7 @@ Status FuseScaleOffsetToConvWeights(const std::vector& scale_values, NodeDef bias_add_node; bias_add_node.set_op("BiasAdd"); bias_add_node.set_name(conv_output_name); - bias_add_op.attr["data_format"].CopyFrom(conv_node.attr["data_format"]) + bias_add_node.attr["data_format"].CopyFrom(conv_node.attr["data_format"]) CopyNodeAttr(conv_node, "T", "T", &bias_add_node); AddNodeInput(conv_node.name(), &bias_add_node); AddNodeInput(bias_offset_node.name(), &bias_add_node); -- GitLab From d0680917907671f5870818d21ee0ff77bf7c3ff6 Mon Sep 17 00:00:00 2001 From: Joe Yearsley Date: Fri, 9 Mar 2018 23:56:52 +0000 Subject: [PATCH 011/434] Update fold_old_batch_norms.cc --- tensorflow/tools/graph_transforms/fold_old_batch_norms.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/tools/graph_transforms/fold_old_batch_norms.cc b/tensorflow/tools/graph_transforms/fold_old_batch_norms.cc index 3376a81312..59f3ffdcda 100644 --- a/tensorflow/tools/graph_transforms/fold_old_batch_norms.cc +++ b/tensorflow/tools/graph_transforms/fold_old_batch_norms.cc @@ -159,7 +159,7 @@ Status FuseScaleOffsetToConvWeights(const std::vector& scale_values, NodeDef bias_add_node; bias_add_node.set_op("BiasAdd"); bias_add_node.set_name(conv_output_name); - bias_add_node.attr["data_format"].CopyFrom(conv_node.attr["data_format"]) + CopyNodeAttr(conv_node, "data_format", "data_format", &bias_add_node); CopyNodeAttr(conv_node, "T", "T", &bias_add_node); AddNodeInput(conv_node.name(), &bias_add_node); AddNodeInput(bias_offset_node.name(), &bias_add_node); -- GitLab From 0c6845db28bd690eb848dde837f23fef6a0a8eed Mon Sep 17 00:00:00 2001 From: josephyearsley Date: Sat, 31 Mar 2018 17:40:40 +0100 Subject: [PATCH 012/434] Copy data_format if the original node has that attr. --- tensorflow/tools/graph_transforms/fold_old_batch_norms.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tensorflow/tools/graph_transforms/fold_old_batch_norms.cc b/tensorflow/tools/graph_transforms/fold_old_batch_norms.cc index 59f3ffdcda..988ba25e36 100644 --- a/tensorflow/tools/graph_transforms/fold_old_batch_norms.cc +++ b/tensorflow/tools/graph_transforms/fold_old_batch_norms.cc @@ -159,7 +159,9 @@ Status FuseScaleOffsetToConvWeights(const std::vector& scale_values, NodeDef bias_add_node; bias_add_node.set_op("BiasAdd"); bias_add_node.set_name(conv_output_name); - CopyNodeAttr(conv_node, "data_format", "data_format", &bias_add_node); + if (HasAttr(conv_node, "data_format")) { + CopyNodeAttr(conv_node, "data_format", "data_format", &bias_add_node); + } CopyNodeAttr(conv_node, "T", "T", &bias_add_node); AddNodeInput(conv_node.name(), &bias_add_node); AddNodeInput(bias_offset_node.name(), &bias_add_node); -- GitLab From 0b9eedd684b4085ab65d60627efa8594a92a0b98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Sat, 7 Apr 2018 11:47:03 +0800 Subject: [PATCH 013/434] TST: add test case for duplicate indices --- .../kernel_tests/scatter_nd_ops_test.py | 26 +++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/kernel_tests/scatter_nd_ops_test.py b/tensorflow/python/kernel_tests/scatter_nd_ops_test.py index 03b2f892c6..dfe9600dbb 100644 --- a/tensorflow/python/kernel_tests/scatter_nd_ops_test.py +++ b/tensorflow/python/kernel_tests/scatter_nd_ops_test.py @@ -366,13 +366,35 @@ class ScatterNdTest(test.TestCase): def testString(self): indices = constant_op.constant([[4], [3], [1], [7]], dtype=dtypes.int32) - updates = constant_op.constant(["four", "three", "one", "seven"], dtype=dtypes.string) + updates = constant_op.constant(["four", "three", "one", "seven"], + dtype=dtypes.string) expected = np.array(["", "one", "", "three", "four", "", "", "seven"]) scatter = self.scatter_nd(indices, updates, shape=(8,)) + with self.test_session() as sess: + result = sess.run(scatter) + self.assertAllEqual(expected, result) + # Same indice is updated twice by same value. + indices = constant_op.constant([[4], [3], [3], [7]], dtype=dtypes.int32) + updates = constant_op.constant(["a", "b", "b", "c"], + dtype=dtypes.string) + expected = np.array(["", "", "", "bb", "a", "", "", "c"]) + scatter = self.scatter_nd(indices, updates, shape=(8,)) + with self.test_session() as sess: + result = sess.run(scatter) + self.assertAllEqual(expected, result) + + # Same indice is updated twice by different value. + indices = constant_op.constant([[4], [3], [3], [7]], dtype=dtypes.int32) + updates = constant_op.constant(["a", "b", "c", "d"], + dtype=dtypes.string) + expected = [np.array(["", "", "", "bc", "a", "", "", "d"]), + np.array(["", "", "", "cb", "a", "", "", "d"])] + scatter = self.scatter_nd(indices, updates, shape=(8,)) with self.test_session() as sess: result = sess.run(scatter) - self.assertTrue(np.array_equal(result, expected)) + self.assertTrue(np.array_equal(result, expected[0]) or + np.array_equal(result, expected[1])) def testRank3ValidShape(self): indices = array_ops.zeros([2, 2, 2], dtypes.int32) -- GitLab From 9e1bbbc0fb770f077d9de295b53181e3592f1d24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Sat, 7 Apr 2018 12:07:11 +0800 Subject: [PATCH 014/434] DOC: remove the misleading 'empty tensor' --- tensorflow/core/api_def/base_api/api_def_ScatterNd.pbtxt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/api_def/base_api/api_def_ScatterNd.pbtxt b/tensorflow/core/api_def/base_api/api_def_ScatterNd.pbtxt index 4e95895f54..58753a651a 100644 --- a/tensorflow/core/api_def/base_api/api_def_ScatterNd.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_ScatterNd.pbtxt @@ -25,7 +25,7 @@ A new tensor with the given shape and updates applied according to the indices. END } - summary: "Scatter `updates` into a new empty tensor according to `indices`." + summary: "Scatter `updates` into a new tensor according to `indices`." description: < Date: Fri, 13 Apr 2018 10:19:24 -0700 Subject: [PATCH 015/434] Cherry-picking PR #18444 into r1.8 --- tensorflow/contrib/tensorrt/BUILD | 2 +- .../contrib/tensorrt/resources/trt_resource_manager.cc | 6 ++++++ .../contrib/tensorrt/resources/trt_resource_manager.h | 6 +----- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/tensorflow/contrib/tensorrt/BUILD b/tensorflow/contrib/tensorrt/BUILD index 2f316767b3..fd3582e175 100644 --- a/tensorflow/contrib/tensorrt/BUILD +++ b/tensorflow/contrib/tensorrt/BUILD @@ -52,7 +52,6 @@ tf_custom_op_library( "ops/trt_engine_op.cc", ], deps = [ - ":trt_engine_op_kernel", ":trt_shape_function", "//tensorflow/core:lib_proto_parsing", ] + if_tensorrt([ @@ -183,6 +182,7 @@ tf_py_wrap_cc( copts = tf_copts(), deps = [ ":trt_conversion", + ":trt_engine_op_kernel", "//tensorflow/core:framework_lite", "//util/python:python_headers", ], diff --git a/tensorflow/contrib/tensorrt/resources/trt_resource_manager.cc b/tensorflow/contrib/tensorrt/resources/trt_resource_manager.cc index e663eed4dd..9c3698e5d1 100644 --- a/tensorflow/contrib/tensorrt/resources/trt_resource_manager.cc +++ b/tensorflow/contrib/tensorrt/resources/trt_resource_manager.cc @@ -19,6 +19,12 @@ limitations under the License. namespace tensorflow { namespace tensorrt { +std::shared_ptr +tensorflow::tensorrt::TRTResourceManager::instance() { + static std::shared_ptr instance_(new TRTResourceManager); + return instance_; +} + std::shared_ptr tensorflow::tensorrt::TRTResourceManager::getManager(const string& op_name) { // mutex is held for lookup only. Most instantiations where mutex will be held diff --git a/tensorflow/contrib/tensorrt/resources/trt_resource_manager.h b/tensorflow/contrib/tensorrt/resources/trt_resource_manager.h index 5f8ad491d3..bc15b51e05 100644 --- a/tensorflow/contrib/tensorrt/resources/trt_resource_manager.h +++ b/tensorflow/contrib/tensorrt/resources/trt_resource_manager.h @@ -29,11 +29,7 @@ class TRTResourceManager { TRTResourceManager() = default; public: - static std::shared_ptr instance() { - static std::shared_ptr instance_( - new TRTResourceManager); - return instance_; - } + static std::shared_ptr instance(); // returns a manager for given op, if it doesn't exists it creates one std::shared_ptr getManager(const string& op_name); -- GitLab From 76a73f899cdc5e19ef2b99373524dcb4dba0bd2b Mon Sep 17 00:00:00 2001 From: Younghee Kwon Date: Mon, 9 Apr 2018 17:45:13 -0700 Subject: [PATCH 016/434] boosted_trees: early stop hooks are fixed to stop at the right moment by reading tensor values in a separate session after train_op run. PiperOrigin-RevId: 192217338 --- .../python/estimator/boosted_trees_test.py | 97 +++++++------------ .../python/estimator/canned/boosted_trees.py | 33 +++---- .../estimator/canned/boosted_trees_test.py | 63 +++++------- 3 files changed, 71 insertions(+), 122 deletions(-) diff --git a/tensorflow/contrib/estimator/python/estimator/boosted_trees_test.py b/tensorflow/contrib/estimator/python/estimator/boosted_trees_test.py index e99a87f3b3..eee5910687 100644 --- a/tensorflow/contrib/estimator/python/estimator/boosted_trees_test.py +++ b/tensorflow/contrib/estimator/python/estimator/boosted_trees_test.py @@ -20,6 +20,7 @@ from __future__ import print_function import numpy as np from tensorflow.contrib.estimator.python.estimator import boosted_trees +from tensorflow.core.kernels.boosted_trees import boosted_trees_pb2 from tensorflow.python.estimator.canned import boosted_trees as canned_boosted_trees from tensorflow.python.estimator.inputs import numpy_io from tensorflow.python.feature_column import feature_column @@ -69,10 +70,18 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): for i in range(NUM_FEATURES) } - def _assert_checkpoint(self, model_dir, expected_global_step): - self.assertEqual(expected_global_step, - checkpoint_utils.load_variable(model_dir, - ops.GraphKeys.GLOBAL_STEP)) + def _assert_checkpoint(self, model_dir, global_step, finalized_trees, + attempted_layers): + reader = checkpoint_utils.load_checkpoint(model_dir) + self.assertEqual(global_step, reader.get_tensor(ops.GraphKeys.GLOBAL_STEP)) + serialized = reader.get_tensor('boosted_trees:0_serialized') + ensemble_proto = boosted_trees_pb2.TreeEnsemble() + ensemble_proto.ParseFromString(serialized) + self.assertEqual( + finalized_trees, + sum([1 for t in ensemble_proto.tree_metadata if t.is_finalized])) + self.assertEqual(attempted_layers, + ensemble_proto.growing_metadata.num_layers_attempted) def testTrainAndEvaluateEstimator(self): input_fn = _make_train_input_fn(is_classification=False) @@ -88,9 +97,10 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): num_steps = 100 # Train for a few steps, and validate final checkpoint. est.train(input_fn, steps=num_steps) - self._assert_checkpoint(est.model_dir, 11) + self._assert_checkpoint( + est.model_dir, global_step=10, finalized_trees=2, attempted_layers=10) eval_res = est.evaluate(input_fn=input_fn, steps=1) - self.assertAllClose(eval_res['average_loss'], 0.913176) + self.assertAllClose(eval_res['average_loss'], 1.008551) def testInferEstimator(self): train_input_fn = _make_train_input_fn(is_classification=False) @@ -108,31 +118,13 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): num_steps = 100 # Train for a few steps, and validate final checkpoint. est.train(train_input_fn, steps=num_steps) - self._assert_checkpoint(est.model_dir, 6) - + self._assert_checkpoint( + est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5) + # Validate predictions. predictions = list(est.predict(input_fn=predict_input_fn)) - self.assertEquals(5, len(predictions)) - self.assertAllClose([0.703549], predictions[0]['predictions']) - self.assertAllClose([0.266539], predictions[1]['predictions']) - self.assertAllClose([0.256479], predictions[2]['predictions']) - self.assertAllClose([1.088732], predictions[3]['predictions']) - self.assertAllClose([1.901732], predictions[4]['predictions']) - - -class BoostedTreesClassifierTrainInMemoryTest(test_util.TensorFlowTestCase): - - def setUp(self): - self._feature_columns = { - feature_column.bucketized_column( - feature_column.numeric_column('f_%d' % i, dtype=dtypes.float32), - BUCKET_BOUNDARIES) - for i in range(NUM_FEATURES) - } - - def _assert_checkpoint(self, model_dir, expected_global_step): - self.assertEqual(expected_global_step, - checkpoint_utils.load_variable(model_dir, - ops.GraphKeys.GLOBAL_STEP)) + self.assertAllClose( + [[0.571619], [0.262821], [0.124549], [0.956801], [1.769801]], + [pred['predictions'] for pred in predictions]) def testBinaryClassifierTrainInMemoryAndEvalAndInfer(self): train_input_fn = _make_train_input_fn(is_classification=True) @@ -145,36 +137,16 @@ class BoostedTreesClassifierTrainInMemoryTest(test_util.TensorFlowTestCase): n_trees=1, max_depth=5) # It will stop after 5 steps because of the max depth and num trees. - self._assert_checkpoint(est.model_dir, 6) + self._assert_checkpoint( + est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5) # Check eval. eval_res = est.evaluate(input_fn=train_input_fn, steps=1) self.assertAllClose(eval_res['accuracy'], 1.0) - - # Check predict that all labels are correct. + # Validate predictions. predictions = list(est.predict(input_fn=predict_input_fn)) - self.assertEquals(5, len(predictions)) - self.assertAllClose([0], predictions[0]['class_ids']) - self.assertAllClose([1], predictions[1]['class_ids']) - self.assertAllClose([1], predictions[2]['class_ids']) - self.assertAllClose([0], predictions[3]['class_ids']) - self.assertAllClose([0], predictions[4]['class_ids']) - - -class BoostedTreesRegressorTrainInMemoryTest(test_util.TensorFlowTestCase): - - def setUp(self): - self._feature_columns = { - feature_column.bucketized_column( - feature_column.numeric_column('f_%d' % i, dtype=dtypes.float32), - BUCKET_BOUNDARIES) - for i in range(NUM_FEATURES) - } - - def _assert_checkpoint(self, model_dir, expected_global_step): - self.assertEqual(expected_global_step, - checkpoint_utils.load_variable(model_dir, - ops.GraphKeys.GLOBAL_STEP)) + self.assertAllClose([[0], [1], [1], [0], [0]], + [pred['class_ids'] for pred in predictions]) def testRegressorTrainInMemoryAndEvalAndInfer(self): train_input_fn = _make_train_input_fn(is_classification=False) @@ -187,20 +159,17 @@ class BoostedTreesRegressorTrainInMemoryTest(test_util.TensorFlowTestCase): n_trees=1, max_depth=5) # It will stop after 5 steps because of the max depth and num trees. - self._assert_checkpoint(est.model_dir, 6) + self._assert_checkpoint( + est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5) # Check eval. eval_res = est.evaluate(input_fn=train_input_fn, steps=1) - self.assertAllClose(eval_res['average_loss'], 2.2136638) - + self.assertAllClose(eval_res['average_loss'], 2.478283) # Validate predictions. predictions = list(est.predict(input_fn=predict_input_fn)) - self.assertEquals(5, len(predictions)) - self.assertAllClose([0.703549], predictions[0]['predictions']) - self.assertAllClose([0.266539], predictions[1]['predictions']) - self.assertAllClose([0.256479], predictions[2]['predictions']) - self.assertAllClose([1.088732], predictions[3]['predictions']) - self.assertAllClose([1.901732], predictions[4]['predictions']) + self.assertAllClose( + [[0.571619], [0.262821], [0.124549], [0.956801], [1.769801]], + [pred['predictions'] for pred in predictions]) if __name__ == '__main__': diff --git a/tensorflow/python/estimator/canned/boosted_trees.py b/tensorflow/python/estimator/canned/boosted_trees.py index 500ea03ea7..c5d5455b1a 100644 --- a/tensorflow/python/estimator/canned/boosted_trees.py +++ b/tensorflow/python/estimator/canned/boosted_trees.py @@ -209,8 +209,8 @@ class _CacheTrainingStatesUsingVariables(object): name='cache_insert') -class StopAtAttemptsHook(session_run_hook.SessionRunHook): - """Hook that requests stop at the number of trees.""" +class _StopAtAttemptsHook(session_run_hook.SessionRunHook): + """Hook that requests stop at the number of attempts.""" def __init__(self, num_finalized_trees_tensor, num_attempted_layers_tensor, max_trees, max_depth): @@ -224,25 +224,17 @@ class StopAtAttemptsHook(session_run_hook.SessionRunHook): [self._num_finalized_trees_tensor, self._num_attempted_layers_tensor]) def after_run(self, run_context, run_values): + # num_* tensors should be retrieved by a separate session than the training + # one, in order to read the values after growing. + # So, if it's approaching to the limit, get the actual value by additional + # session. num_finalized_trees, num_attempted_layers = run_values.results + if (num_finalized_trees >= self._max_trees - 1 or + num_attempted_layers > 2 * self._max_trees * self._max_depth - 1): + num_finalized_trees, num_attempted_layers = run_context.session.run( + [self._num_finalized_trees_tensor, self._num_attempted_layers_tensor]) if (num_finalized_trees >= self._max_trees or - 1.0 * num_attempted_layers / self._max_depth > 2 * self._max_trees): - run_context.request_stop() - - -class StopAtNumTreesHook(session_run_hook.SessionRunHook): - """Hook that requests stop at the number of trees.""" - - def __init__(self, num_trees_tensor, max_trees): - self._num_trees_tensor = num_trees_tensor - self._max_trees = max_trees - - def before_run(self, run_context): - return session_run_hook.SessionRunArgs(self._num_trees_tensor) - - def after_run(self, run_context, run_values): - num_trees = run_values.results - if num_trees > self._max_trees: + num_attempted_layers > 2 * self._max_trees * self._max_depth): run_context.request_stop() @@ -468,7 +460,8 @@ def _bt_model_fn( # Add an early stop hook. estimator_spec = estimator_spec._replace( training_hooks=estimator_spec.training_hooks + - (StopAtNumTreesHook(num_trees, tree_hparams.n_trees),)) + (_StopAtAttemptsHook(num_finalized_trees, num_attempted_layers, + tree_hparams.n_trees, tree_hparams.max_depth),)) return estimator_spec diff --git a/tensorflow/python/estimator/canned/boosted_trees_test.py b/tensorflow/python/estimator/canned/boosted_trees_test.py index 01e5cc7a5d..625745a3f9 100644 --- a/tensorflow/python/estimator/canned/boosted_trees_test.py +++ b/tensorflow/python/estimator/canned/boosted_trees_test.py @@ -69,7 +69,7 @@ def _make_train_input_fn(is_classification): return _input_fn -class BoostedTreesClassifierTest(test_util.TensorFlowTestCase): +class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): def setUp(self): self._feature_columns = { @@ -79,10 +79,18 @@ class BoostedTreesClassifierTest(test_util.TensorFlowTestCase): for i in range(NUM_FEATURES) } - def _assert_checkpoint(self, model_dir, expected_global_step): - self.assertEqual(expected_global_step, - checkpoint_utils.load_variable(model_dir, - ops.GraphKeys.GLOBAL_STEP)) + def _assert_checkpoint(self, model_dir, global_step, finalized_trees, + attempted_layers): + reader = checkpoint_utils.load_checkpoint(model_dir) + self.assertEqual(global_step, reader.get_tensor(ops.GraphKeys.GLOBAL_STEP)) + serialized = reader.get_tensor('boosted_trees:0_serialized') + ensemble_proto = boosted_trees_pb2.TreeEnsemble() + ensemble_proto.ParseFromString(serialized) + self.assertEqual( + finalized_trees, + sum([1 for t in ensemble_proto.tree_metadata if t.is_finalized])) + self.assertEqual(attempted_layers, + ensemble_proto.growing_metadata.num_layers_attempted) def testTrainAndEvaluateBinaryClassifier(self): input_fn = _make_train_input_fn(is_classification=True) @@ -97,7 +105,8 @@ class BoostedTreesClassifierTest(test_util.TensorFlowTestCase): num_steps = 100 # Train for a few steps, and validate final checkpoint. est.train(input_fn, steps=num_steps) - self._assert_checkpoint(est.model_dir, 6) + self._assert_checkpoint( + est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5) eval_res = est.evaluate(input_fn=input_fn, steps=1) self.assertAllClose(eval_res['accuracy'], 1.0) @@ -118,29 +127,9 @@ class BoostedTreesClassifierTest(test_util.TensorFlowTestCase): est.train(train_input_fn, steps=num_steps) predictions = list(est.predict(input_fn=predict_input_fn)) - self.assertEquals(5, len(predictions)) # All labels are correct. - self.assertAllClose([0], predictions[0]['class_ids']) - self.assertAllClose([1], predictions[1]['class_ids']) - self.assertAllClose([1], predictions[2]['class_ids']) - self.assertAllClose([0], predictions[3]['class_ids']) - self.assertAllClose([0], predictions[4]['class_ids']) - - -class BoostedTreesRegressionTest(test_util.TensorFlowTestCase): - - def setUp(self): - self._feature_columns = { - feature_column.bucketized_column( - feature_column.numeric_column('f_%d' % i, dtype=dtypes.float32), - BUCKET_BOUNDARIES) - for i in range(NUM_FEATURES) - } - - def _assert_checkpoint(self, model_dir, expected_global_step): - self.assertEqual(expected_global_step, - checkpoint_utils.load_variable(model_dir, - ops.GraphKeys.GLOBAL_STEP)) + self.assertAllClose([[0], [1], [1], [0], [0]], + [pred['class_ids'] for pred in predictions]) def testTrainAndEvaluateRegressor(self): input_fn = _make_train_input_fn(is_classification=False) @@ -155,9 +144,10 @@ class BoostedTreesRegressionTest(test_util.TensorFlowTestCase): num_steps = 100 # Train for a few steps, and validate final checkpoint. est.train(input_fn, steps=num_steps) - self._assert_checkpoint(est.model_dir, 11) + self._assert_checkpoint( + est.model_dir, global_step=10, finalized_trees=2, attempted_layers=10) eval_res = est.evaluate(input_fn=input_fn, steps=1) - self.assertAllClose(eval_res['average_loss'], 0.913176) + self.assertAllClose(eval_res['average_loss'], 1.008551) def testInferRegressor(self): train_input_fn = _make_train_input_fn(is_classification=False) @@ -174,16 +164,13 @@ class BoostedTreesRegressionTest(test_util.TensorFlowTestCase): num_steps = 100 # Train for a few steps, and validate final checkpoint. est.train(train_input_fn, steps=num_steps) - self._assert_checkpoint(est.model_dir, 6) + self._assert_checkpoint( + est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5) predictions = list(est.predict(input_fn=predict_input_fn)) - - self.assertEquals(5, len(predictions)) - self.assertAllClose([0.703549], predictions[0]['predictions']) - self.assertAllClose([0.266539], predictions[1]['predictions']) - self.assertAllClose([0.256479], predictions[2]['predictions']) - self.assertAllClose([1.088732], predictions[3]['predictions']) - self.assertAllClose([1.901732], predictions[4]['predictions']) + self.assertAllClose( + [[0.571619], [0.262821], [0.124549], [0.956801], [1.769801]], + [pred['predictions'] for pred in predictions]) class ModelFnTests(test_util.TensorFlowTestCase): -- GitLab From 3e1739c0c3c6cd3b74879f3e1872dd1354401e56 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 11 Apr 2018 15:37:49 -0700 Subject: [PATCH 017/434] Revealing the range of node ids in the latest layer via resource' state PiperOrigin-RevId: 192520351 --- ...tedTreesCalculateBestGainsPerFeature.pbtxt | 4 +- ...pi_def_BoostedTreesGetEnsembleStates.pbtxt | 12 +++++- .../kernels/boosted_trees/boosted_trees.proto | 4 ++ .../kernels/boosted_trees/resource_ops.cc | 12 ++++++ .../core/kernels/boosted_trees/resources.h | 20 ++++++++++ .../core/kernels/boosted_trees/stats_ops.cc | 6 +-- .../kernels/boosted_trees/training_ops.cc | 8 ++++ tensorflow/core/ops/boosted_trees_ops.cc | 2 + .../core/ops/compat/ops_history.v1.pbtxt | 4 ++ .../python/estimator/canned/boosted_trees.py | 9 ++--- .../estimator/canned/boosted_trees_test.py | 12 ++++++ .../boosted_trees/resource_ops_test.py | 31 +++++++++----- .../boosted_trees/stats_ops_test.py | 8 ++-- .../boosted_trees/training_ops_test.py | 40 +++++++++++++++++-- tensorflow/python/ops/boosted_trees_ops.py | 15 ++++--- 15 files changed, 150 insertions(+), 37 deletions(-) diff --git a/tensorflow/core/api_def/base_api/api_def_BoostedTreesCalculateBestGainsPerFeature.pbtxt b/tensorflow/core/api_def/base_api/api_def_BoostedTreesCalculateBestGainsPerFeature.pbtxt index b1921e3507..62876a293c 100644 --- a/tensorflow/core/api_def/base_api/api_def_BoostedTreesCalculateBestGainsPerFeature.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_BoostedTreesCalculateBestGainsPerFeature.pbtxt @@ -4,7 +4,7 @@ op { in_arg { name: "node_id_range" description: <allocate_output(0, TensorShape(), &output_stamp_token_t)); @@ -110,11 +111,22 @@ class BoostedTreesGetEnsembleStatesOp : public OpKernel { OP_REQUIRES_OK(context, context->allocate_output(3, TensorShape(), &output_num_attempted_layers_t)); + OP_REQUIRES_OK(context, context->allocate_output( + 4, {2}, &output_last_layer_nodes_range_t)); output_stamp_token_t->scalar()() = tree_ensemble_resource->stamp(); output_num_trees_t->scalar()() = num_trees; output_num_finalized_trees_t->scalar()() = num_finalized_trees; output_num_attempted_layers_t->scalar()() = num_attempted_layers; + + int32 range_start; + int32 range_end; + tree_ensemble_resource->GetLastLayerNodesRange(&range_start, &range_end); + + output_last_layer_nodes_range_t->vec()(0) = range_start; + // For a completely empty ensemble, this will be 0. To make it a valid range + // we add this max cond. + output_last_layer_nodes_range_t->vec()(1) = std::max(1, range_end); } }; diff --git a/tensorflow/core/kernels/boosted_trees/resources.h b/tensorflow/core/kernels/boosted_trees/resources.h index c82588b950..561ca3a18a 100644 --- a/tensorflow/core/kernels/boosted_trees/resources.h +++ b/tensorflow/core/kernels/boosted_trees/resources.h @@ -93,6 +93,26 @@ class BoostedTreesEnsembleResource : public StampedResource { new_num_layers); } + void UpdateLastLayerNodesRange(const int32 node_range_start, + int32 node_range_end) const { + tree_ensemble_->mutable_growing_metadata()->set_last_layer_node_start( + node_range_start); + tree_ensemble_->mutable_growing_metadata()->set_last_layer_node_end( + node_range_end); + } + + void GetLastLayerNodesRange(int32* node_range_start, + int32* node_range_end) const { + *node_range_start = + tree_ensemble_->growing_metadata().last_layer_node_start(); + *node_range_end = tree_ensemble_->growing_metadata().last_layer_node_end(); + } + + int64 GetNumNodes(const int32 tree_id) { + DCHECK_LT(tree_id, tree_ensemble_->trees_size()); + return tree_ensemble_->trees(tree_id).nodes_size(); + } + void UpdateGrowingMetadata() const; int32 GetNumLayersAttempted() { diff --git a/tensorflow/core/kernels/boosted_trees/stats_ops.cc b/tensorflow/core/kernels/boosted_trees/stats_ops.cc index 33fdab6a86..16e65cf284 100644 --- a/tensorflow/core/kernels/boosted_trees/stats_ops.cc +++ b/tensorflow/core/kernels/boosted_trees/stats_ops.cc @@ -42,8 +42,8 @@ class BoostedTreesCalculateBestGainsPerFeatureOp : public OpKernel { const Tensor* node_id_range_t; OP_REQUIRES_OK(context, context->input("node_id_range", &node_id_range_t)); const auto node_id_range = node_id_range_t->vec(); - int32 node_id_first = node_id_range(0); - int32 node_id_last = node_id_range(1); // inclusive. + const int32 node_id_first = node_id_range(0); // inclusive + const int32 node_id_last = node_id_range(1); // exclusive // stats_summary_list OpInputList stats_summary_list; OP_REQUIRES_OK(context, context->input_list("stats_summary_list", @@ -86,7 +86,7 @@ class BoostedTreesCalculateBestGainsPerFeatureOp : public OpKernel { std::vector output_thresholds; std::vector output_left_node_contribs; std::vector output_right_node_contribs; - for (int node_id = node_id_first; node_id <= node_id_last; ++node_id) { + for (int node_id = node_id_first; node_id < node_id_last; ++node_id) { // Calculate gains. cum_grad.clear(); cum_hess.clear(); diff --git a/tensorflow/core/kernels/boosted_trees/training_ops.cc b/tensorflow/core/kernels/boosted_trees/training_ops.cc index b9ded4054a..67cac14c52 100644 --- a/tensorflow/core/kernels/boosted_trees/training_ops.cc +++ b/tensorflow/core/kernels/boosted_trees/training_ops.cc @@ -101,6 +101,7 @@ class BoostedTreesUpdateEnsembleOp : public OpKernel { << current_tree << " of ensemble of " << current_tree + 1 << " trees."; bool split_happened = false; + int32 node_id_start = ensemble_resource->GetNumNodes(current_tree); // Add the splits to the tree. for (auto& split_entry : best_splits) { const int32 node_id = split_entry.first; @@ -139,11 +140,15 @@ class BoostedTreesUpdateEnsembleOp : public OpKernel { right_contrib, &left_node_id, &right_node_id); split_happened = true; } + int32 node_id_end = ensemble_resource->GetNumNodes(current_tree); if (split_happened) { // Update growable tree metadata. ensemble_resource->SetNumLayersGrown(current_tree, new_num_layers); // Finalize the tree if needed. if (ensemble_resource->GetNumLayersGrown(current_tree) >= max_depth_) { + // If the tree is finalized, next growing will start from node 0; + node_id_start = 0; + node_id_end = 1; ensemble_resource->SetIsFinalized(current_tree, true); if (pruning_mode_ == kPostPruning) { ensemble_resource->PostPruneTree(current_tree); @@ -153,6 +158,9 @@ class BoostedTreesUpdateEnsembleOp : public OpKernel { ensemble_resource->AddNewTree(kLayerByLayerTreeWeight); } } + // If we managed to split, update the node range. If we didn't, don't + // update as we will try to split the same nodes with new instances. + ensemble_resource->UpdateLastLayerNodesRange(node_id_start, node_id_end); } } diff --git a/tensorflow/core/ops/boosted_trees_ops.cc b/tensorflow/core/ops/boosted_trees_ops.cc index 297e94655f..8af4903418 100644 --- a/tensorflow/core/ops/boosted_trees_ops.cc +++ b/tensorflow/core/ops/boosted_trees_ops.cc @@ -128,6 +128,7 @@ REGISTER_OP("BoostedTreesGetEnsembleStates") .Output("num_trees: int32") .Output("num_finalized_trees: int32") .Output("num_attempted_layers: int32") + .Output("last_layer_nodes_range: int32") .SetShapeFn([](shape_inference::InferenceContext* c) { shape_inference::ShapeHandle unused_input; TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &unused_input)); @@ -135,6 +136,7 @@ REGISTER_OP("BoostedTreesGetEnsembleStates") c->set_output(1, c->Scalar()); c->set_output(2, c->Scalar()); c->set_output(3, c->Scalar()); + c->set_output(4, c->Vector(2)); return Status::OK(); }); diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt index 026bfa89cf..2f6f588d2c 100644 --- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt +++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt @@ -10861,6 +10861,10 @@ op { name: "num_attempted_layers" type: DT_INT32 } + output_arg { + name: "last_layer_nodes_range" + type: DT_INT32 + } is_stateful: true } op { diff --git a/tensorflow/python/estimator/canned/boosted_trees.py b/tensorflow/python/estimator/canned/boosted_trees.py index c5d5455b1a..58af59dbb1 100644 --- a/tensorflow/python/estimator/canned/boosted_trees.py +++ b/tensorflow/python/estimator/canned/boosted_trees.py @@ -349,8 +349,8 @@ def _bt_model_fn( array_ops.zeros( [batch_size, head.logits_dimension], dtype=dtypes.float32)) with ops.control_dependencies([ensemble_reload]): - (stamp_token, num_trees, num_finalized_trees, - num_attempted_layers) = local_tree_ensemble.get_states() + (stamp_token, num_trees, num_finalized_trees, num_attempted_layers, + last_layer_nodes_range) = local_tree_ensemble.get_states() summary.scalar('ensemble/num_trees', num_trees) summary.scalar('ensemble/num_finalized_trees', num_finalized_trees) summary.scalar('ensemble/num_attempted_layers', num_attempted_layers) @@ -393,10 +393,7 @@ def _bt_model_fn( (node_ids_per_feature, gains_list, thresholds_list, left_node_contribs_list, right_node_contribs_list) = ( boosted_trees_ops.calculate_best_gains_per_feature( - node_id_range=array_ops.stack([ - math_ops.reduce_min(node_ids), - math_ops.reduce_max(node_ids) - ]), + node_id_range=last_layer_nodes_range, stats_summary_list=stats_summary_list, l1=tree_hparams.l1, l2=tree_hparams.l2, diff --git a/tensorflow/python/estimator/canned/boosted_trees_test.py b/tensorflow/python/estimator/canned/boosted_trees_test.py index 625745a3f9..7823ef8410 100644 --- a/tensorflow/python/estimator/canned/boosted_trees_test.py +++ b/tensorflow/python/estimator/canned/boosted_trees_test.py @@ -223,6 +223,8 @@ class ModelFnTests(test_util.TensorFlowTestCase): growing_metadata { num_trees_attempted: 1 num_layers_attempted: 1 + last_layer_node_start: 1 + last_layer_node_end: 3 } """ second_round = """ @@ -307,6 +309,8 @@ class ModelFnTests(test_util.TensorFlowTestCase): growing_metadata { num_trees_attempted: 1 num_layers_attempted: 2 + last_layer_node_start: 0 + last_layer_node_end: 1 } """ third_round = """ @@ -407,6 +411,8 @@ class ModelFnTests(test_util.TensorFlowTestCase): growing_metadata { num_trees_attempted: 2 num_layers_attempted: 3 + last_layer_node_start: 1 + last_layer_node_end: 3 } """ return (first_round, second_round, third_round) @@ -444,6 +450,8 @@ class ModelFnTests(test_util.TensorFlowTestCase): growing_metadata { num_trees_attempted: 1 num_layers_attempted: 1 + last_layer_node_start: 1 + last_layer_node_end: 3 } """ second_round = """ @@ -528,6 +536,8 @@ class ModelFnTests(test_util.TensorFlowTestCase): growing_metadata { num_trees_attempted: 1 num_layers_attempted: 2 + last_layer_node_start: 0 + last_layer_node_end: 1 } """ third_round = """ @@ -628,6 +638,8 @@ class ModelFnTests(test_util.TensorFlowTestCase): growing_metadata { num_trees_attempted: 2 num_layers_attempted: 3 + last_layer_node_start: 1 + last_layer_node_end: 3 } """ return (first_round, second_round, third_round) diff --git a/tensorflow/python/kernel_tests/boosted_trees/resource_ops_test.py b/tensorflow/python/kernel_tests/boosted_trees/resource_ops_test.py index a223241e89..d5f0c22d6e 100644 --- a/tensorflow/python/kernel_tests/boosted_trees/resource_ops_test.py +++ b/tensorflow/python/kernel_tests/boosted_trees/resource_ops_test.py @@ -36,16 +36,18 @@ class ResourceOpsTest(test_util.TensorFlowTestCase): resources.initialize_resources(resources.shared_resources()).run() stamp_token = ensemble.get_stamp_token() self.assertEqual(0, stamp_token.eval()) - (_, num_trees, num_finalized_trees, - num_attempted_layers) = ensemble.get_states() + (_, num_trees, num_finalized_trees, num_attempted_layers, + nodes_range) = ensemble.get_states() self.assertEqual(0, num_trees.eval()) self.assertEqual(0, num_finalized_trees.eval()) self.assertEqual(0, num_attempted_layers.eval()) + self.assertAllEqual([0, 1], nodes_range.eval()) def testCreateWithProto(self): with self.test_session(): ensemble_proto = boosted_trees_pb2.TreeEnsemble() - text_format.Merge(""" + text_format.Merge( + """ trees { nodes { bucketized_split { @@ -141,6 +143,8 @@ class ResourceOpsTest(test_util.TensorFlowTestCase): growing_metadata { num_trees_attempted: 2 num_layers_attempted: 6 + last_layer_node_start: 16 + last_layer_node_end: 19 } """, ensemble_proto) ensemble = boosted_trees_ops.TreeEnsemble( @@ -148,28 +152,31 @@ class ResourceOpsTest(test_util.TensorFlowTestCase): stamp_token=7, serialized_proto=ensemble_proto.SerializeToString()) resources.initialize_resources(resources.shared_resources()).run() - (stamp_token, num_trees, num_finalized_trees, - num_attempted_layers) = ensemble.get_states() + (stamp_token, num_trees, num_finalized_trees, num_attempted_layers, + nodes_range) = ensemble.get_states() self.assertEqual(7, stamp_token.eval()) self.assertEqual(2, num_trees.eval()) self.assertEqual(1, num_finalized_trees.eval()) self.assertEqual(6, num_attempted_layers.eval()) + self.assertAllEqual([16, 19], nodes_range.eval()) def testSerializeDeserialize(self): with self.test_session(): # Initialize. ensemble = boosted_trees_ops.TreeEnsemble('ensemble', stamp_token=5) resources.initialize_resources(resources.shared_resources()).run() - (stamp_token, num_trees, num_finalized_trees, - num_attempted_layers) = ensemble.get_states() + (stamp_token, num_trees, num_finalized_trees, num_attempted_layers, + nodes_range) = ensemble.get_states() self.assertEqual(5, stamp_token.eval()) self.assertEqual(0, num_trees.eval()) self.assertEqual(0, num_finalized_trees.eval()) self.assertEqual(0, num_attempted_layers.eval()) + self.assertAllEqual([0, 1], nodes_range.eval()) # Deserialize. ensemble_proto = boosted_trees_pb2.TreeEnsemble() - text_format.Merge(""" + text_format.Merge( + """ trees { nodes { bucketized_split { @@ -201,6 +208,8 @@ class ResourceOpsTest(test_util.TensorFlowTestCase): growing_metadata { num_trees_attempted: 1 num_layers_attempted: 5 + last_layer_node_start: 3 + last_layer_node_end: 7 } """, ensemble_proto) with ops.control_dependencies([ @@ -208,13 +217,15 @@ class ResourceOpsTest(test_util.TensorFlowTestCase): stamp_token=3, serialized_proto=ensemble_proto.SerializeToString()) ]): - (stamp_token, num_trees, num_finalized_trees, - num_attempted_layers) = ensemble.get_states() + (stamp_token, num_trees, num_finalized_trees, num_attempted_layers, + nodes_range) = ensemble.get_states() self.assertEqual(3, stamp_token.eval()) self.assertEqual(1, num_trees.eval()) # This reads from metadata, not really counting the layers. self.assertEqual(5, num_attempted_layers.eval()) self.assertEqual(0, num_finalized_trees.eval()) + self.assertAllEqual([3, 7], nodes_range.eval()) + # Serialize. new_ensemble_proto = boosted_trees_pb2.TreeEnsemble() diff --git a/tensorflow/python/kernel_tests/boosted_trees/stats_ops_test.py b/tensorflow/python/kernel_tests/boosted_trees/stats_ops_test.py index a54cc43517..4d09cf94d4 100644 --- a/tensorflow/python/kernel_tests/boosted_trees/stats_ops_test.py +++ b/tensorflow/python/kernel_tests/boosted_trees/stats_ops_test.py @@ -29,7 +29,7 @@ class StatsOpsTest(test_util.TensorFlowTestCase): """Testing Gain calculation without any regularization.""" with self.test_session() as sess: max_splits = 7 - node_id_range = [1, 2] # node 1 through 2 will be processed. + node_id_range = [1, 3] # node 1 through 2 will be processed. stats_summary_list = [ [ [[0., 0.], [.08, .09], [0., 0.], [0., 0.]], # node 0; ignored @@ -76,7 +76,7 @@ class StatsOpsTest(test_util.TensorFlowTestCase): """Testing Gain calculation with L2.""" with self.test_session() as sess: max_splits = 7 - node_id_range = [1, 2] # node 1 through 2 will be processed. + node_id_range = [1, 3] # node 1 through 2 will be processed. stats_summary_list = [ [ [[0., 0.], [.08, .09], [0., 0.], [0., 0.]], # node 0; ignored @@ -123,7 +123,7 @@ class StatsOpsTest(test_util.TensorFlowTestCase): """Testing Gain calculation with L1.""" with self.test_session() as sess: max_splits = 7 - node_id_range = [1, 2] # node 1 through 2 will be processed. + node_id_range = [1, 3] # node 1 through 2 will be processed. stats_summary_list = [ [ [[0., 0.], [.08, .09], [0., 0.], [0., 0.]], # node 0; ignored @@ -173,7 +173,7 @@ class StatsOpsTest(test_util.TensorFlowTestCase): """Testing Gain calculation with L2.""" with self.test_session() as sess: max_splits = 7 - node_id_range = [1, 2] # node 1 through 2 will be processed. + node_id_range = [1, 3] # node 1 through 2 will be processed. stats_summary_list = [ [ [[0., 0.], [.08, .09], [0., 0.], [0., 0.]], # node 0; ignored diff --git a/tensorflow/python/kernel_tests/boosted_trees/training_ops_test.py b/tensorflow/python/kernel_tests/boosted_trees/training_ops_test.py index 4226ff75c2..d6c0047747 100644 --- a/tensorflow/python/kernel_tests/boosted_trees/training_ops_test.py +++ b/tensorflow/python/kernel_tests/boosted_trees/training_ops_test.py @@ -132,6 +132,8 @@ class UpdateTreeEnsembleOpTest(test_util.TensorFlowTestCase): growing_metadata { num_trees_attempted: 1 num_layers_attempted: 1 + last_layer_node_start: 0 + last_layer_node_end: 1 } """ self.assertEqual(new_stamp, 1) @@ -314,6 +316,8 @@ class UpdateTreeEnsembleOpTest(test_util.TensorFlowTestCase): growing_metadata { num_trees_attempted: 1 num_layers_attempted: 2 + last_layer_node_start: 0 + last_layer_node_end: 1 } """ self.assertEqual(new_stamp, 1) @@ -461,6 +465,8 @@ class UpdateTreeEnsembleOpTest(test_util.TensorFlowTestCase): growing_metadata { num_trees_attempted: 2 num_layers_attempted: 2 + last_layer_node_start: 1 + last_layer_node_end: 3 } """ self.assertEqual(new_stamp, 1) @@ -615,6 +621,8 @@ class UpdateTreeEnsembleOpTest(test_util.TensorFlowTestCase): growing_metadata { num_trees_attempted: 1 num_layers_attempted: 2 + last_layer_node_start: 3 + last_layer_node_end: 5 } """ self.assertEqual(new_stamp, 1) @@ -624,7 +632,8 @@ class UpdateTreeEnsembleOpTest(test_util.TensorFlowTestCase): """Test that the metadata is updated even though we can't split.""" with self.test_session() as session: tree_ensemble_config = boosted_trees_pb2.TreeEnsemble() - text_format.Merge(""" + text_format.Merge( + """ trees { nodes { bucketized_split { @@ -655,6 +664,9 @@ class UpdateTreeEnsembleOpTest(test_util.TensorFlowTestCase): growing_metadata { num_trees_attempted: 1 num_layers_attempted: 1 + last_layer_node_start: 1 + last_layer_node_end: 3 + } """, tree_ensemble_config) @@ -685,7 +697,7 @@ class UpdateTreeEnsembleOpTest(test_util.TensorFlowTestCase): # Expect no new splits created, but attempted (global) stats updated. Meta # data for this tree should not be updated (we didn't succeed building a - # layer. + # layer. Node ranges don't change. new_stamp, serialized = session.run(tree_ensemble.serialize()) tree_ensemble = boosted_trees_pb2.TreeEnsemble() tree_ensemble.ParseFromString(serialized) @@ -721,6 +733,8 @@ class UpdateTreeEnsembleOpTest(test_util.TensorFlowTestCase): growing_metadata { num_trees_attempted: 1 num_layers_attempted: 2 + last_layer_node_start: 1 + last_layer_node_end: 3 } """ self.assertEqual(new_stamp, 1) @@ -730,7 +744,8 @@ class UpdateTreeEnsembleOpTest(test_util.TensorFlowTestCase): """Test metadata is updated correctly when no split due to prepruning.""" with self.test_session() as session: tree_ensemble_config = boosted_trees_pb2.TreeEnsemble() - text_format.Merge(""" + text_format.Merge( + """ trees { nodes { bucketized_split { @@ -761,6 +776,8 @@ class UpdateTreeEnsembleOpTest(test_util.TensorFlowTestCase): growing_metadata { num_trees_attempted: 1 num_layers_attempted: 1 + last_layer_node_start: 1 + last_layer_node_end: 3 } """, tree_ensemble_config) @@ -851,6 +868,8 @@ class UpdateTreeEnsembleOpTest(test_util.TensorFlowTestCase): growing_metadata { num_trees_attempted: 1 num_layers_attempted: 2 + last_layer_node_start: 1 + last_layer_node_end: 3 } """ self.assertEqual(new_stamp, 1) @@ -941,6 +960,8 @@ class UpdateTreeEnsembleOpTest(test_util.TensorFlowTestCase): growing_metadata { num_trees_attempted: 1 num_layers_attempted: 1 + last_layer_node_start: 1 + last_layer_node_end: 3 } """ self.assertEqual(new_stamp, 1) @@ -1046,6 +1067,8 @@ class UpdateTreeEnsembleOpTest(test_util.TensorFlowTestCase): growing_metadata { num_trees_attempted: 1 num_layers_attempted: 2 + last_layer_node_start: 3 + last_layer_node_end: 7 } """ self.assertEqual(new_stamp, 2) @@ -1179,6 +1202,8 @@ class UpdateTreeEnsembleOpTest(test_util.TensorFlowTestCase): growing_metadata { num_trees_attempted: 1 num_layers_attempted: 3 + last_layer_node_start: 0 + last_layer_node_end: 1 } """ self.assertEqual(new_stamp, 3) @@ -1268,6 +1293,8 @@ class UpdateTreeEnsembleOpTest(test_util.TensorFlowTestCase): growing_metadata { num_trees_attempted: 1 num_layers_attempted: 1 + last_layer_node_start: 1 + last_layer_node_end: 3 } """ self.assertEqual(new_stamp, 1) @@ -1307,7 +1334,8 @@ class UpdateTreeEnsembleOpTest(test_util.TensorFlowTestCase): # Expect the ensemble to be empty as post-pruning will prune # the entire finalized tree. self.assertEqual(new_stamp, 2) - self.assertProtoEquals(""" + self.assertProtoEquals( + """ trees { nodes { leaf { @@ -1359,6 +1387,8 @@ class UpdateTreeEnsembleOpTest(test_util.TensorFlowTestCase): growing_metadata { num_trees_attempted: 1 num_layers_attempted: 2 + last_layer_node_start: 0 + last_layer_node_end: 1 } """, res_ensemble) @@ -1455,6 +1485,8 @@ class UpdateTreeEnsembleOpTest(test_util.TensorFlowTestCase): growing_metadata { num_trees_attempted: 1 num_layers_attempted: 1 + last_layer_node_start: 0 + last_layer_node_end: 1 } """ self.assertEqual(new_stamp, 1) diff --git a/tensorflow/python/ops/boosted_trees_ops.py b/tensorflow/python/ops/boosted_trees_ops.py index 174d00987f..2a2bcdd9d6 100644 --- a/tensorflow/python/ops/boosted_trees_ops.py +++ b/tensorflow/python/ops/boosted_trees_ops.py @@ -115,7 +115,7 @@ class TreeEnsemble(object): def get_stamp_token(self): """Returns the current stamp token of the resource.""" - stamp_token, _, _, _ = ( + stamp_token, _, _, _, _ = ( gen_boosted_trees_ops.boosted_trees_get_ensemble_states( self.resource_handle)) return stamp_token @@ -124,17 +124,20 @@ class TreeEnsemble(object): """Returns states of the tree ensemble. Returns: - stamp_token, num_trees, num_finalized_trees, num_attempted_layers. + stamp_token, num_trees, num_finalized_trees, num_attempted_layers and + range of the nodes in the latest layer. """ - stamp_token, num_trees, num_finalized_trees, num_attempted_layers = ( - gen_boosted_trees_ops.boosted_trees_get_ensemble_states( - self.resource_handle)) + (stamp_token, num_trees, num_finalized_trees, num_attempted_layers, + nodes_range) = ( + gen_boosted_trees_ops.boosted_trees_get_ensemble_states( + self.resource_handle)) # Use identity to give names. return (array_ops.identity(stamp_token, name='stamp_token'), array_ops.identity(num_trees, name='num_trees'), array_ops.identity(num_finalized_trees, name='num_finalized_trees'), array_ops.identity( - num_attempted_layers, name='num_attempted_layers')) + num_attempted_layers, name='num_attempted_layers'), + array_ops.identity(nodes_range, name='last_layer_nodes_range')) def serialize(self): """Serializes the ensemble into proto and returns the serialized proto. -- GitLab From 33c737b70d42e05cabc43b4c6e778e988b6d0a9e Mon Sep 17 00:00:00 2001 From: Younghee Kwon Date: Wed, 11 Apr 2018 16:59:45 -0700 Subject: [PATCH 018/434] boosted_trees: make sure ensemble deserialization happens for the non-TRAIN modes too. PiperOrigin-RevId: 192532297 --- .../python/estimator/canned/boosted_trees.py | 29 ++++++++++--------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/tensorflow/python/estimator/canned/boosted_trees.py b/tensorflow/python/estimator/canned/boosted_trees.py index 58af59dbb1..0ecc8c7089 100644 --- a/tensorflow/python/estimator/canned/boosted_trees.py +++ b/tensorflow/python/estimator/canned/boosted_trees.py @@ -317,27 +317,28 @@ def _bt_model_fn( head.logits_dimension) # Create Ensemble resources. - if is_single_machine: - tree_ensemble = boosted_trees_ops.TreeEnsemble(name=name) - local_tree_ensemble = tree_ensemble - ensemble_reload = control_flow_ops.no_op() - else: - tree_ensemble = boosted_trees_ops.TreeEnsemble(name=name) - with ops.device(worker_device): - local_tree_ensemble = boosted_trees_ops.TreeEnsemble( - name=name + '_local', is_local=True) - # TODO(soroush): Do partial updates if this becomes a bottleneck. - ensemble_reload = local_tree_ensemble.deserialize( - *tree_ensemble.serialize()) - + tree_ensemble = boosted_trees_ops.TreeEnsemble(name=name) # Create logits. if mode != model_fn.ModeKeys.TRAIN: logits = boosted_trees_ops.predict( - tree_ensemble_handle=local_tree_ensemble.resource_handle, + # For non-TRAIN mode, ensemble doesn't change after initialization, + # so no local copy is needed; using tree_ensemble directly. + tree_ensemble_handle=tree_ensemble.resource_handle, bucketized_features=input_feature_list, logits_dimension=head.logits_dimension, max_depth=tree_hparams.max_depth) else: + if is_single_machine: + local_tree_ensemble = tree_ensemble + ensemble_reload = control_flow_ops.no_op() + else: + # Have a local copy of ensemble for the distributed setting. + with ops.device(worker_device): + local_tree_ensemble = boosted_trees_ops.TreeEnsemble( + name=name + '_local', is_local=True) + # TODO(soroush): Do partial updates if this becomes a bottleneck. + ensemble_reload = local_tree_ensemble.deserialize( + *tree_ensemble.serialize()) if cache: cached_tree_ids, cached_node_ids, cached_logits = cache.lookup() else: -- GitLab From 7810e47e7d7c90b0e3df8e251964a38ebff9d978 Mon Sep 17 00:00:00 2001 From: Martin Wicke <577277+martinwicke@users.noreply.github.com> Date: Mon, 16 Apr 2018 11:56:46 -0700 Subject: [PATCH 019/434] Merge pull request #18568 from case540/enable_git_tag_override Add ability to override git tag in __git_version__ string. --- tensorflow/tensorflow.bzl | 2 +- tensorflow/tools/git/gen_git_source.py | 37 +++++++++++++++++++++----- 2 files changed, 31 insertions(+), 8 deletions(-) diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index 528f811b40..b286834ded 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -1704,7 +1704,7 @@ def tf_version_info_genrule(): ], outs=["util/version_info.cc"], cmd= - "$(location //tensorflow/tools/git:gen_git_source.py) --generate $(SRCS) \"$@\"", + "$(location //tensorflow/tools/git:gen_git_source.py) --generate $(SRCS) \"$@\" --git_tag_override=$${GIT_TAG_OVERRIDE:-}", local=1, tools=[clean_dep("//tensorflow/tools/git:gen_git_source.py")],) diff --git a/tensorflow/tools/git/gen_git_source.py b/tensorflow/tools/git/gen_git_source.py index cbcdbf5b80..db2580755b 100755 --- a/tensorflow/tools/git/gen_git_source.py +++ b/tensorflow/tools/git/gen_git_source.py @@ -139,7 +139,7 @@ def configure(src_base_path, gen_path, debug=False): print("gen_git_source.py: spec is %r" % spec) -def get_git_version(git_base_path): +def get_git_version(git_base_path, git_tag_override): """Get the git version from the repository. This function runs `git describe ...` in the path given as `git_base_path`. @@ -152,6 +152,9 @@ def get_git_version(git_base_path): Args: git_base_path: where the .git directory is located + git_tag_override: Override the value for the git tag. This is useful for + releases where we want to build the release before the git tag is + created. Returns: A bytestring representing the git version """ @@ -161,6 +164,14 @@ def get_git_version(git_base_path): "git", str("--git-dir=%s/.git" % git_base_path), str("--work-tree=" + git_base_path), "describe", "--long", "--tags" ]).strip()) + if git_tag_override: + split_val = val.split("-") + if len(split_val) != 3: + raise Exception( + ("Expected git version in format 'TAG-COMMITS AFTER TAG-HASH' " + "but got '%s'") % val) + split_val[0] = git_tag_override + val = bytes("-".join(split_val)) return val if val else unknown_label except subprocess.CalledProcessError: return unknown_label @@ -197,7 +208,7 @@ const int tf_monolithic_build() { open(filename, "w").write(contents) -def generate(arglist): +def generate(arglist, git_tag_override=None): """Generate version_info.cc as given `destination_file`. Args: @@ -217,6 +228,10 @@ def generate(arglist): `ref_symlink` is unused in this script but passed, because the build system uses that file to detect when commits happen. + git_tag_override: Override the value for the git tag. This is useful for + releases where we want to build the release before the git tag is + created. + Raises: RuntimeError: If ./configure needs to be run, RuntimeError will be raised. """ @@ -234,11 +249,11 @@ def generate(arglist): raise RuntimeError( "Run ./configure again, branch was '%s' but is now '%s'" % (old_branch, new_branch)) - git_version = get_git_version(data["path"]) + git_version = get_git_version(data["path"], git_tag_override) write_version_info(dest_file, git_version) -def raw_generate(output_file): +def raw_generate(output_file, git_tag_override=None): """Simple generator used for cmake/make build systems. This does not create any symlinks. It requires the build system @@ -246,9 +261,12 @@ def raw_generate(output_file): Args: output_file: Output filename for the version info cc + git_tag_override: Override the value for the git tag. This is useful for + releases where we want to build the release before the git tag is + created. """ - git_version = get_git_version(".") + git_version = get_git_version(".", git_tag_override) write_version_info(output_file, git_version) @@ -270,6 +288,11 @@ parser.add_argument( "--gen_root_path", type=str, help="Root path to place generated git files (created by --configure).") +parser.add_argument( + "--git_tag_override", type=str, + help="Override git tag value in the __git_version__ string. Useful when " + "creating release builds before the release tag is created.") + parser.add_argument( "--generate", type=str, @@ -288,9 +311,9 @@ if args.configure is not None: raise RuntimeError("Must pass --gen_root_path arg when running --configure") configure(args.configure, args.gen_root_path, debug=args.debug) elif args.generate is not None: - generate(args.generate) + generate(args.generate, args.git_tag_override) elif args.raw_generate is not None: - raw_generate(args.raw_generate) + raw_generate(args.raw_generate, args.git_tag_override) else: raise RuntimeError("--configure or --generate or --raw_generate " "must be used") -- GitLab From e9e5356b206e9399b5d06b618fc77f460e9613bf Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 28 Mar 2018 10:03:37 -0700 Subject: [PATCH 020/434] Enable the Grappler arithmetic optimizer by default in Python tests. PiperOrigin-RevId: 190787954 --- tensorflow/python/framework/test_util.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py index 990fa429a1..bf00fa6439 100644 --- a/tensorflow/python/framework/test_util.py +++ b/tensorflow/python/framework/test_util.py @@ -974,8 +974,6 @@ class TensorFlowTestCase(googletest.TestCase): config.graph_options.optimizer_options.opt_level = -1 config.graph_options.rewrite_options.constant_folding = ( rewriter_config_pb2.RewriterConfig.OFF) - config.graph_options.rewrite_options.arithmetic_optimization = ( - rewriter_config_pb2.RewriterConfig.OFF) return config if graph is None: -- GitLab From 9e4818375f3853c1a8cdd18fe22d1b1f447cfaef Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 13 Apr 2018 10:30:32 -0700 Subject: [PATCH 021/434] Disable x * x -> square(x) Grapler rewrite for complex types unless the op is on CPU. Square is not registered for complex types on GPU, and doing so produces a crash in with CUDA_ILLEGAL_INSTRUCTION when running it on open source ubuntu. PiperOrigin-RevId: 192788160 --- .../optimizers/arithmetic_optimizer.cc | 21 +++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc index fa0f7c1c6e..a8fa4a10cb 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc @@ -1732,13 +1732,22 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses( if (node->op() == "Mul" && node->input(0) == node->input(1) && !OptimizedNodeExists(*node, "square")) { - NodeDef* new_square_node = AddNode(*node, "square", /*copy_node=*/true); - new_square_node->set_op("Square"); - for (int i = 1; i < new_square_node->input_size(); ++i) { - new_square_node->set_input(i - 1, new_square_node->input(i)); + const DataType type = GetDataTypeFromAttr(*node, "T"); + bool is_complex = (type == DT_COMPLEX64) || (type == DT_COMPLEX128); + string dontcare; + string device; + bool is_on_cpu = + DeviceNameUtils::SplitDeviceName(node->device(), &dontcare, &device) && + str_util::StrContains(device, DEVICE_CPU); + if (!is_complex || is_on_cpu) { + NodeDef* new_square_node = AddNode(*node, "square", /*copy_node=*/true); + new_square_node->set_op("Square"); + for (int i = 1; i < new_square_node->input_size(); ++i) { + new_square_node->set_input(i - 1, new_square_node->input(i)); + } + new_square_node->mutable_input()->RemoveLast(); + return new_square_node->name(); } - new_square_node->mutable_input()->RemoveLast(); - return new_square_node->name(); } if (IsAggregate(*node) && NumNonControlInputs(*node) > 0) { -- GitLab From b358c9932e0d2f50e50baa5f1a9441e3594244c4 Mon Sep 17 00:00:00 2001 From: Sourabh Bajaj Date: Wed, 11 Apr 2018 15:20:11 -0700 Subject: [PATCH 022/434] GCS Filesystem should not cache checkpoint file as we need to read the updated checkpoints from the contents. PiperOrigin-RevId: 192517819 (cherry picked from commit 079d63d59b75bdfd25f7371efda25ec5f6739b78) --- .../core/platform/cloud/gcs_file_system.cc | 8 ++++ .../platform/cloud/gcs_file_system_test.cc | 48 +++++++++++++++++++ 2 files changed, 56 insertions(+) diff --git a/tensorflow/core/platform/cloud/gcs_file_system.cc b/tensorflow/core/platform/cloud/gcs_file_system.cc index 3c0dc13d75..6ed1d5dad2 100644 --- a/tensorflow/core/platform/cloud/gcs_file_system.cc +++ b/tensorflow/core/platform/cloud/gcs_file_system.cc @@ -301,6 +301,14 @@ class GcsRandomAccessFile : public RandomAccessFile { TF_RETURN_IF_ERROR(file_block_cache_->Read(filename_, offset, n, scratch, &bytes_transferred)); *result = StringPiece(scratch, bytes_transferred); + string checkpoint_ending = "/checkpoint"; + // Check if the file is the checkpoint file as we should not be caching + // that. As it's contents are updated and used for iterating checkpoints. + if (std::equal(checkpoint_ending.rbegin(), checkpoint_ending.rend(), + filename_.rbegin())) { + // Remove the checkpoint file from the cache + file_block_cache_->RemoveFile(filename_); + } if (bytes_transferred < n) { // This is not an error per se. The RandomAccessFile interface expects // that Read returns OutOfRange if fewer bytes were read than requested. diff --git a/tensorflow/core/platform/cloud/gcs_file_system_test.cc b/tensorflow/core/platform/cloud/gcs_file_system_test.cc index 2fbde9b6a7..e9eca04fef 100644 --- a/tensorflow/core/platform/cloud/gcs_file_system_test.cc +++ b/tensorflow/core/platform/cloud/gcs_file_system_test.cc @@ -198,6 +198,54 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_WithBlockCache) { EXPECT_EQ("0123", result); } +TEST(GcsFileSystemTest, NewRandomAccessFile_CheckpointFile_WithBlockCache) { + // Our underlying file in this test changes as new data comes in + std::vector requests( + {new FakeHttpRequest( + "Uri: https://storage.googleapis.com/bucket/checkpoint\n" + "Auth Token: fake_token\n" + "Range: 0-8\n" + "Timeouts: 5 1 20\n", + "012345678"), + new FakeHttpRequest( + "Uri: https://storage.googleapis.com/bucket/checkpoint\n" + "Auth Token: fake_token\n" + "Range: 0-8\n" + "Timeouts: 5 1 20\n", + "abcdefghi")}); + GcsFileSystem fs( + std::unique_ptr(new FakeAuthProvider), + std::unique_ptr( + new FakeHttpRequestFactory(&requests)), + 9 /* block size */, 18 /* max bytes */, 0 /* max staleness */, + 0 /* stat cache max age */, 0 /* stat cache max entries */, + 0 /* matching paths cache max age */, + 0 /* matching paths cache max entries */, 0 /* initial retry delay */, + kTestTimeoutConfig, nullptr /* gcs additional header */); + + char scratch[100]; + StringPiece result; + { + // We are instantiating this in an enclosed scope to make sure after the + // unique ptr goes out of scope, we can still access result. + std::unique_ptr file; + TF_EXPECT_OK(fs.NewRandomAccessFile("gs://bucket/checkpoint", &file)); + + // Read the first chunk. The cache will be populated with the first block of + // 9 bytes. + scratch[5] = 'x'; + TF_EXPECT_OK(file->Read(0, 4, &result, scratch)); + EXPECT_EQ("0123", result); + EXPECT_EQ(scratch[5], 'x'); // Make sure we only copied 4 bytes. + + // The second chunk should not be in cache so we make a new request + // As the checkpoint file should not be cached + TF_EXPECT_OK(file->Read(0, 4, &result, scratch)); + EXPECT_EQ("abcd", result); + EXPECT_EQ(scratch[5], 'x'); // Make sure we only copied 4 bytes. + } +} + TEST(GcsFileSystemTest, NewRandomAccessFile_WithBlockCache_Flush) { // Our underlying file in this test is a 15 byte file with contents // "0123456789abcde". -- GitLab From 2b47b7f374612c34985aad4adedfa8d9a5b2440c Mon Sep 17 00:00:00 2001 From: Junpeng Lao Date: Tue, 17 Apr 2018 13:28:02 +0200 Subject: [PATCH 023/434] Unify style in bijector --- .../contrib/distributions/python/ops/bijectors/invert.py | 4 ++-- .../python/ops/bijectors/masked_autoregressive.py | 4 ++-- .../contrib/distributions/python/ops/bijectors/permute.py | 4 ++-- .../contrib/distributions/python/ops/bijectors/real_nvp.py | 4 ++-- .../contrib/distributions/python/ops/bijectors/reshape.py | 4 ++-- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/invert.py b/tensorflow/contrib/distributions/python/ops/bijectors/invert.py index 1904239a0e..84a3289ba2 100644 --- a/tensorflow/contrib/distributions/python/ops/bijectors/invert.py +++ b/tensorflow/contrib/distributions/python/ops/bijectors/invert.py @@ -18,14 +18,14 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.python.ops.distributions import bijector as bijector_lib +from tensorflow.python.ops.distributions import bijector __all__ = [ "Invert", ] -class Invert(bijector_lib.Bijector): +class Invert(bijector.Bijector): """Bijector which inverts another Bijector. Example Use: [ExpGammaDistribution (see Background & Context)]( diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/masked_autoregressive.py b/tensorflow/contrib/distributions/python/ops/bijectors/masked_autoregressive.py index ef56cf6ddd..83667b0e80 100644 --- a/tensorflow/contrib/distributions/python/ops/bijectors/masked_autoregressive.py +++ b/tensorflow/contrib/distributions/python/ops/bijectors/masked_autoregressive.py @@ -32,7 +32,7 @@ from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn_ops from tensorflow.python.ops import template as template_ops from tensorflow.python.ops import variable_scope as variable_scope_lib -from tensorflow.python.ops.distributions import bijector as bijector_lib +from tensorflow.python.ops.distributions import bijector __all__ = [ @@ -42,7 +42,7 @@ __all__ = [ ] -class MaskedAutoregressiveFlow(bijector_lib.Bijector): +class MaskedAutoregressiveFlow(bijector.Bijector): """Affine MaskedAutoregressiveFlow bijector for vector-valued events. The affine autoregressive flow [(Papamakarios et al., 2016)][3] provides a diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/permute.py b/tensorflow/contrib/distributions/python/ops/bijectors/permute.py index 4978167803..12a16a3f2b 100644 --- a/tensorflow/contrib/distributions/python/ops/bijectors/permute.py +++ b/tensorflow/contrib/distributions/python/ops/bijectors/permute.py @@ -28,7 +28,7 @@ from tensorflow.python.ops import check_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn_ops -from tensorflow.python.ops.distributions import bijector as bijector_lib +from tensorflow.python.ops.distributions import bijector __all__ = [ @@ -36,7 +36,7 @@ __all__ = [ ] -class Permute(bijector_lib.Bijector): +class Permute(bijector.Bijector): """Permutes the rightmost dimension of a `Tensor`. ```python diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/real_nvp.py b/tensorflow/contrib/distributions/python/ops/bijectors/real_nvp.py index f09ab21bce..66e8a5b9b3 100644 --- a/tensorflow/contrib/distributions/python/ops/bijectors/real_nvp.py +++ b/tensorflow/contrib/distributions/python/ops/bijectors/real_nvp.py @@ -25,7 +25,7 @@ from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn_ops from tensorflow.python.ops import template as template_ops -from tensorflow.python.ops.distributions import bijector as bijector_lib +from tensorflow.python.ops.distributions import bijector __all__ = [ @@ -34,7 +34,7 @@ __all__ = [ ] -class RealNVP(bijector_lib.Bijector): +class RealNVP(bijector.Bijector): """RealNVP "affine coupling layer" for vector-valued events. Real NVP models a normalizing flow on a `D`-dimensional distribution via a diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/reshape.py b/tensorflow/contrib/distributions/python/ops/bijectors/reshape.py index f21b982ba6..5497c422e4 100644 --- a/tensorflow/contrib/distributions/python/ops/bijectors/reshape.py +++ b/tensorflow/contrib/distributions/python/ops/bijectors/reshape.py @@ -28,7 +28,7 @@ from tensorflow.python.ops import array_ops from tensorflow.python.ops import check_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops -from tensorflow.python.ops.distributions import bijector as bijector_lib +from tensorflow.python.ops.distributions import bijector __all__ = [ @@ -44,7 +44,7 @@ def _ndims_from_shape(shape): return array_ops.shape(shape)[0] -class Reshape(bijector_lib.Bijector): +class Reshape(bijector.Bijector): """Reshapes the `event_shape` of a `Tensor`. The semantics generally follow that of `tf.reshape()`, with -- GitLab From 9620211c64e95818d59ad6991059a4a66b6a064d Mon Sep 17 00:00:00 2001 From: Junpeng Lao Date: Tue, 17 Apr 2018 17:19:16 +0200 Subject: [PATCH 024/434] minor format clean up --- .../contrib/distributions/python/ops/bijectors/weibull.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/weibull.py b/tensorflow/contrib/distributions/python/ops/bijectors/weibull.py index 39129cd22c..a22560fe80 100644 --- a/tensorflow/contrib/distributions/python/ops/bijectors/weibull.py +++ b/tensorflow/contrib/distributions/python/ops/bijectors/weibull.py @@ -128,7 +128,7 @@ class Weibull(bijector.Bijector): return x is_valid = check_ops.assert_non_negative( x, - message="Forward transformation input must be at least {}.".format(0)) + message="Forward transformation input must be at least 0.") return control_flow_ops.with_dependencies([is_valid], x) def _maybe_assert_valid_y(self, y): -- GitLab From fe1753af198dbfc64f7ab623865dd91cbdda8eeb Mon Sep 17 00:00:00 2001 From: Junpeng Lao Date: Tue, 17 Apr 2018 18:51:01 +0200 Subject: [PATCH 025/434] WIP implemented Ordered bijector --- .../kernel_tests/bijectors/ordered_test.py | 111 +++++++++++++++++ .../python/ops/bijectors/__init__.py | 2 + .../python/ops/bijectors/ordered.py | 114 ++++++++++++++++++ 3 files changed, 227 insertions(+) create mode 100644 tensorflow/contrib/distributions/python/kernel_tests/bijectors/ordered_test.py create mode 100644 tensorflow/contrib/distributions/python/ops/bijectors/ordered.py diff --git a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/ordered_test.py b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/ordered_test.py new file mode 100644 index 0000000000..1bcbfed6c3 --- /dev/null +++ b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/ordered_test.py @@ -0,0 +1,111 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for Bijector.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.contrib.distributions.python.ops.bijectors.ordered import Ordered +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import tensor_shape +from tensorflow.python.ops import array_ops +from tensorflow.python.ops.distributions.bijector_test_util import assert_bijective_and_finite +from tensorflow.python.platform import test + + +rng = np.random.RandomState(42) + + +class OrderedBijectorTest(test.TestCase): + """Tests correctness of the ordered transformation.""" + + def testBijectorVector(self): + with self.test_session(): + ordered = Ordered() + self.assertEqual("ordered", ordered.name) + x = np.log([[2., 3, 4], [4., 8, 12]]) + y = [[0.2, 0.3, 0.4, 0.1], [0.16, 0.32, 0.48, 0.04]] + self.assertAllClose(y, ordered.forward(x).eval()) + self.assertAllClose(x, ordered.inverse(y).eval()) + self.assertAllClose( + -np.sum(np.log(y), axis=1), + ordered.inverse_log_det_jacobian(y, event_ndims=1).eval(), + atol=0., + rtol=1e-7) + self.assertAllClose( + -ordered.inverse_log_det_jacobian(y, event_ndims=1).eval(), + ordered.forward_log_det_jacobian(x, event_ndims=1).eval(), + atol=0., + rtol=1e-7) + + def testBijectorUnknownShape(self): + with self.test_session(): + ordered = Ordered() + self.assertEqual("ordered", ordered.name) + x = array_ops.placeholder(shape=[2, None], dtype=dtypes.float32) + real_x = np.log([[2., 3, 4], [4., 8, 12]]) + y = array_ops.placeholder(shape=[2, None], dtype=dtypes.float32) + real_y = [[0.2, 0.3, 0.4, 0.1], [0.16, 0.32, 0.48, 0.04]] + self.assertAllClose(real_y, ordered.forward(x).eval( + feed_dict={x: real_x})) + self.assertAllClose(real_x, ordered.inverse(y).eval( + feed_dict={y: real_y})) + self.assertAllClose( + -np.sum(np.log(real_y), axis=1), + ordered.inverse_log_det_jacobian(y, event_ndims=1).eval( + feed_dict={y: real_y}), + atol=0., + rtol=1e-7) + self.assertAllClose( + -ordered.inverse_log_det_jacobian(y, event_ndims=1).eval( + feed_dict={y: real_y}), + ordered.forward_log_det_jacobian(x, event_ndims=1).eval( + feed_dict={x: real_x}), + atol=0., + rtol=1e-7) + + def testShapeGetters(self): + with self.test_session(): + x = tensor_shape.TensorShape([4]) + y = tensor_shape.TensorShape([5]) + bijector = Ordered(validate_args=True) + self.assertAllEqual(y, bijector.forward_event_shape(x)) + self.assertAllEqual(y.as_list(), + bijector.forward_event_shape_tensor( + x.as_list()).eval()) + self.assertAllEqual(x, bijector.inverse_event_shape(y)) + self.assertAllEqual(x.as_list(), + bijector.inverse_event_shape_tensor( + y.as_list()).eval()) + + def testBijectiveAndFinite(self): + with self.test_session(): + ordered = Ordered() + x = np.linspace(-50, 50, num=10).reshape(5, 2).astype(np.float32) + # Make y values on the simplex with a wide range. + y_0 = np.ones(5).astype(np.float32) + y_1 = (1e-5 * rng.rand(5)).astype(np.float32) + y_2 = (1e1 * rng.rand(5)).astype(np.float32) + y = np.array([y_0, y_1, y_2]) + y /= y.sum(axis=0) + y = y.T # y.shape = [5, 3] + assert_bijective_and_finite(ordered, x, y, event_ndims=1) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/__init__.py b/tensorflow/contrib/distributions/python/ops/bijectors/__init__.py index babce80396..51478dbeff 100644 --- a/tensorflow/contrib/distributions/python/ops/bijectors/__init__.py +++ b/tensorflow/contrib/distributions/python/ops/bijectors/__init__.py @@ -30,6 +30,7 @@ @@Invert @@Kumaraswamy @@MaskedAutoregressiveFlow +@@Ordered @@Permute @@PowerTransform @@RealNVP @@ -67,6 +68,7 @@ from tensorflow.contrib.distributions.python.ops.bijectors.inline import * from tensorflow.contrib.distributions.python.ops.bijectors.invert import * from tensorflow.contrib.distributions.python.ops.bijectors.kumaraswamy import * from tensorflow.contrib.distributions.python.ops.bijectors.masked_autoregressive import * +from tensorflow.contrib.distributions.python.ops.bijectors.ordered import * from tensorflow.contrib.distributions.python.ops.bijectors.permute import * from tensorflow.contrib.distributions.python.ops.bijectors.power_transform import * from tensorflow.contrib.distributions.python.ops.bijectors.real_nvp import * diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/ordered.py b/tensorflow/contrib/distributions/python/ops/bijectors/ordered.py new file mode 100644 index 0000000000..ec8f660144 --- /dev/null +++ b/tensorflow/contrib/distributions/python/ops/bijectors/ordered.py @@ -0,0 +1,114 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Ordered bijector.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + + +from tensorflow.python.framework import tensor_shape +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import check_ops +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import nn_ops +from tensorflow.python.ops.distributions import bijector + + +__all__ = [ + "Ordered", +] + + +class Ordered(bijector.Bijector): + """Bijector which maps a tensor x_k that has increasing elements in the last + dimension to an unconstrained tensor y_k. + + On the last dimension of the tensor, Ordered bijector performs: + `y[0] = x[0]` + `y[1:] = math_ops.log(x[1:] - x[:-1])` + + Example Use: + + ```python + bijector.Ordered().forward(tf.log([2, 3, 4])) + # Result: [0.6931472, 3.6931472, 7.693147] + + bijector.Ordered().inverse([0.2, 0.3, 0.4]) + # Result: tf.log([2, 3, 4]) + ``` + """ + + def __init__(self, + validate_args=False, + name="ordered"): + self._graph_parents = [] + self._name = name + super(Ordered, self).__init__( + forward_min_event_ndims=1, + validate_args=validate_args, + name=name) + + def _forward_event_shape(self, input_shape): + if input_shape.ndims is None or input_shape[-1] is None: + return input_shape + return tensor_shape.TensorShape([input_shape[-1]]) + + def _forward_event_shape_tensor(self, input_shape): + return (input_shape[-1])[..., array_ops.newaxis] + + def _inverse_event_shape(self, output_shape): + if output_shape.ndims is None or output_shape[-1] is None: + return output_shape + if output_shape[-1] <= 1: + raise ValueError("output_shape[-1] = %d <= 1" % output_shape[-1]) + return tensor_shape.TensorShape([output_shape[-1]]) + + def _inverse_event_shape_tensor(self, output_shape): + if self.validate_args: + # It is not possible for a negative shape so we need only check <= 1. + is_greater_one = check_ops.assert_greater( + output_shape[-1], 1, message="Need last dimension greater than 1.") + output_shape = control_flow_ops.with_dependencies( + [is_greater_one], output_shape) + return (output_shape[-1])[..., array_ops.newaxis] + + def _forward(self, x): + x = self._maybe_assert_valid_x(x) + y0 = array_ops.expand_dims(x[..., 0], -1) + yk = math_ops.log(x[..., 1:] - x[..., :-1]) + y = array_ops.concat([y0, yk], axis=-1) + return y + + def _inverse(self, y): + x0 = array_ops.expand_dims(y[..., 0], -1) + xk = math_ops.exp(y[..., 1:]) + x = array_ops.concat([x0, xk], axis=-1) + return math_ops.cumsum(x, axis=-1) + + def _inverse_log_det_jacobian(self, y): + return math_ops.reduce_sum(y[..., 1:], axis=-1) + + def _forward_log_det_jacobian(self, x): + pass + + def _maybe_assert_valid_x(self, x): + if not self.validate_args: + return x + is_valid = check_ops.is_strictly_increasing( + x, + message="Forward transformation input must be strictly increasing.") + return control_flow_ops.with_dependencies([is_valid], x) \ No newline at end of file -- GitLab From ba1ea3ff90ee44c8e82a1fb9ba757d798b55d144 Mon Sep 17 00:00:00 2001 From: Jiri Simsa Date: Mon, 16 Apr 2018 11:24:43 -0700 Subject: [PATCH 026/434] Porting tests for the `decode_proto` and `encode_proto` to OS. PiperOrigin-RevId: 193070420 --- tensorflow/contrib/BUILD | 1 + tensorflow/contrib/__init__.py | 1 + tensorflow/contrib/cmake/tf_python.cmake | 6 +- tensorflow/contrib/proto/BUILD | 16 + .../contrib/proto/python/kernel_tests/BUILD | 86 +++++ .../proto/python/kernel_tests/build_defs.bzl | 89 ++++++ .../kernel_tests/decode_proto_fail_test.py | 68 ++++ .../kernel_tests/decode_proto_op_test.py | 300 ++++++++++++++++++ .../kernel_tests/encode_proto_op_test.py | 180 +++++++++++ .../python/kernel_tests/minmax.TestCase.pbtxt | 161 ++++++++++ .../python/kernel_tests/nested.TestCase.pbtxt | 16 + .../kernel_tests/optional.TestCase.pbtxt | 20 ++ .../promote_unsigned.TestCase.pbtxt | 21 ++ .../python/kernel_tests/ragged.TestCase.pbtxt | 32 ++ .../kernel_tests/shaped_batch.TestCase.pbtxt | 62 ++++ .../python/kernel_tests/simple.TestCase.pbtxt | 21 ++ .../proto/python/kernel_tests/test_case.py | 35 ++ .../python/kernel_tests/test_example.proto | 149 +++++++++ tensorflow/tools/pip_package/BUILD | 1 + 19 files changed, 1263 insertions(+), 2 deletions(-) create mode 100644 tensorflow/contrib/proto/python/kernel_tests/BUILD create mode 100644 tensorflow/contrib/proto/python/kernel_tests/build_defs.bzl create mode 100644 tensorflow/contrib/proto/python/kernel_tests/decode_proto_fail_test.py create mode 100644 tensorflow/contrib/proto/python/kernel_tests/decode_proto_op_test.py create mode 100644 tensorflow/contrib/proto/python/kernel_tests/encode_proto_op_test.py create mode 100644 tensorflow/contrib/proto/python/kernel_tests/minmax.TestCase.pbtxt create mode 100644 tensorflow/contrib/proto/python/kernel_tests/nested.TestCase.pbtxt create mode 100644 tensorflow/contrib/proto/python/kernel_tests/optional.TestCase.pbtxt create mode 100644 tensorflow/contrib/proto/python/kernel_tests/promote_unsigned.TestCase.pbtxt create mode 100644 tensorflow/contrib/proto/python/kernel_tests/ragged.TestCase.pbtxt create mode 100644 tensorflow/contrib/proto/python/kernel_tests/shaped_batch.TestCase.pbtxt create mode 100644 tensorflow/contrib/proto/python/kernel_tests/simple.TestCase.pbtxt create mode 100644 tensorflow/contrib/proto/python/kernel_tests/test_case.py create mode 100644 tensorflow/contrib/proto/python/kernel_tests/test_example.proto diff --git a/tensorflow/contrib/BUILD b/tensorflow/contrib/BUILD index 9bef0d8b61..ae68f4aec4 100644 --- a/tensorflow/contrib/BUILD +++ b/tensorflow/contrib/BUILD @@ -77,6 +77,7 @@ py_library( "//tensorflow/contrib/optimizer_v2:optimizer_v2_py", "//tensorflow/contrib/periodic_resample:init_py", "//tensorflow/contrib/predictor", + "//tensorflow/contrib/proto", "//tensorflow/contrib/quantization:quantization_py", "//tensorflow/contrib/quantize:quantize_graph", "//tensorflow/contrib/autograph", diff --git a/tensorflow/contrib/__init__.py b/tensorflow/contrib/__init__.py index aaddb06fa0..e27ece8fa5 100644 --- a/tensorflow/contrib/__init__.py +++ b/tensorflow/contrib/__init__.py @@ -64,6 +64,7 @@ from tensorflow.contrib import nn from tensorflow.contrib import opt from tensorflow.contrib import periodic_resample from tensorflow.contrib import predictor +from tensorflow.contrib import proto from tensorflow.contrib import quantization from tensorflow.contrib import quantize from tensorflow.contrib import recurrent diff --git a/tensorflow/contrib/cmake/tf_python.cmake b/tensorflow/contrib/cmake/tf_python.cmake index ded15b4b66..21f59d2563 100755 --- a/tensorflow/contrib/cmake/tf_python.cmake +++ b/tensorflow/contrib/cmake/tf_python.cmake @@ -330,8 +330,10 @@ GENERATE_PYTHON_OP_LIB("ctc_ops") GENERATE_PYTHON_OP_LIB("cudnn_rnn_ops") GENERATE_PYTHON_OP_LIB("data_flow_ops") GENERATE_PYTHON_OP_LIB("dataset_ops") -GENERATE_PYTHON_OP_LIB("decode_proto_ops") -GENERATE_PYTHON_OP_LIB("encode_proto_ops") +GENERATE_PYTHON_OP_LIB("decode_proto_ops" + DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/proto/python/ops/gen_decode_proto_op.py) +GENERATE_PYTHON_OP_LIB("encode_proto_ops" + DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/proto/python/ops/gen_encode_proto_op.py) GENERATE_PYTHON_OP_LIB("image_ops") GENERATE_PYTHON_OP_LIB("io_ops") GENERATE_PYTHON_OP_LIB("linalg_ops") diff --git a/tensorflow/contrib/proto/BUILD b/tensorflow/contrib/proto/BUILD index 046652cbc5..3e9b1a0b8d 100644 --- a/tensorflow/contrib/proto/BUILD +++ b/tensorflow/contrib/proto/BUILD @@ -4,6 +4,8 @@ licenses(["notice"]) # Apache 2.0 exports_files(["LICENSE"]) +load("//tensorflow/core:platform/default/build_config_root.bzl", "if_static") + py_library( name = "proto", srcs = [ @@ -14,3 +16,17 @@ py_library( "//tensorflow/contrib/proto/python/ops:encode_proto_op_py", ], ) + +py_library( + name = "proto_pip", + data = [ + "//tensorflow/contrib/proto/python/kernel_tests:test_messages", + ] + if_static( + [], + otherwise = ["//tensorflow/contrib/proto/python/kernel_tests:libtestexample.so"], + ), + deps = [ + ":proto", + "//tensorflow/contrib/proto/python/kernel_tests:py_test_deps", + ], +) diff --git a/tensorflow/contrib/proto/python/kernel_tests/BUILD b/tensorflow/contrib/proto/python/kernel_tests/BUILD new file mode 100644 index 0000000000..a380a131f8 --- /dev/null +++ b/tensorflow/contrib/proto/python/kernel_tests/BUILD @@ -0,0 +1,86 @@ +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) # Apache 2.0 + +exports_files(["LICENSE"]) + +# Much of the work in this BUILD file actually happens in the corresponding +# build_defs.bzl, which creates an individual testcase for each example .pbtxt +# file in this directory. +# +load(":build_defs.bzl", "decode_proto_test_suite") +load(":build_defs.bzl", "encode_proto_test_suite") + +# This expands to a tf_py_test for each test file. +# It defines the test_suite :decode_proto_op_tests. +decode_proto_test_suite( + name = "decode_proto_tests", + examples = glob(["*.pbtxt"]), +) + +# This expands to a tf_py_test for each test file. +# It defines the test_suite :encode_proto_op_tests. +encode_proto_test_suite( + name = "encode_proto_tests", + examples = glob(["*.pbtxt"]), +) + +# Below here are tests that are not tied to an example text proto. +filegroup( + name = "test_messages", + srcs = glob(["*.pbtxt"]), +) + +load("//tensorflow:tensorflow.bzl", "tf_py_test") +load("//tensorflow:tensorflow.bzl", "tf_cc_shared_object") +load("//tensorflow/core:platform/default/build_config_root.bzl", "if_static") +load("//tensorflow/core:platform/default/build_config.bzl", "tf_proto_library") + +tf_py_test( + name = "decode_proto_fail_test", + size = "small", + srcs = ["decode_proto_fail_test.py"], + additional_deps = [ + ":py_test_deps", + "//third_party/py/numpy", + "//tensorflow/contrib/proto:proto", + "//tensorflow/contrib/proto/python/ops:decode_proto_op_py", + ], + data = if_static( + [], + otherwise = [":libtestexample.so"], + ), + tags = [ + "no_pip", # TODO(b/78026780) + "no_windows", # TODO(b/78028010) + ], +) + +py_library( + name = "test_case", + srcs = ["test_case.py"], + deps = ["//tensorflow/python:client_testlib"], +) + +py_library( + name = "py_test_deps", + deps = [ + ":test_case", + ":test_example_proto_py", + ], +) + +tf_proto_library( + name = "test_example_proto", + srcs = ["test_example.proto"], + cc_api_version = 2, + protodeps = ["//tensorflow/core:protos_all"], +) + +tf_cc_shared_object( + name = "libtestexample.so", + linkstatic = 1, + deps = [ + ":test_example_proto_cc", + ], +) diff --git a/tensorflow/contrib/proto/python/kernel_tests/build_defs.bzl b/tensorflow/contrib/proto/python/kernel_tests/build_defs.bzl new file mode 100644 index 0000000000..f425601691 --- /dev/null +++ b/tensorflow/contrib/proto/python/kernel_tests/build_defs.bzl @@ -0,0 +1,89 @@ +"""BUILD rules for generating file-driven proto test cases. + +The decode_proto_test_suite() and encode_proto_test_suite() rules take a list +of text protos and generates a tf_py_test() for each one. +""" + +load("//tensorflow:tensorflow.bzl", "tf_py_test") +load("//tensorflow:tensorflow.bzl", "register_extension_info") +load("//tensorflow/core:platform/default/build_config_root.bzl", "if_static") + +def _test_name(test, path): + return "%s_%s_test" % (test, path.split("/")[-1].split(".")[0]) + +def decode_proto_test_suite(name, examples): + """Build the decode_proto py_test for each test filename.""" + for test_filename in examples: + tf_py_test( + name = _test_name("decode_proto", test_filename), + srcs = ["decode_proto_op_test.py"], + size = "small", + data = [test_filename] + if_static( + [], + otherwise = [":libtestexample.so"], + ), + main = "decode_proto_op_test.py", + args = [ + "--message_text_file=\"%s/%s\"" % (native.package_name(), test_filename), + ], + additional_deps = [ + ":py_test_deps", + "//third_party/py/numpy", + "//tensorflow/contrib/proto:proto", + "//tensorflow/contrib/proto/python/ops:decode_proto_op_py", + ], + tags = [ + "no_pip", # TODO(b/78026780) + "no_windows", # TODO(b/78028010) + ], + ) + native.test_suite( + name = name, + tests = [":" + _test_name("decode_proto", test_filename) + for test_filename in examples], + ) + +def encode_proto_test_suite(name, examples): + """Build the encode_proto py_test for each test filename.""" + for test_filename in examples: + tf_py_test( + name = _test_name("encode_proto", test_filename), + srcs = ["encode_proto_op_test.py"], + size = "small", + data = [test_filename] + if_static( + [], + otherwise = [":libtestexample.so"], + ), + main = "encode_proto_op_test.py", + args = [ + "--message_text_file=\"%s/%s\"" % (native.package_name(), test_filename), + ], + additional_deps = [ + ":py_test_deps", + "//third_party/py/numpy", + "//tensorflow/contrib/proto:proto", + "//tensorflow/contrib/proto/python/ops:decode_proto_op_py", + "//tensorflow/contrib/proto/python/ops:encode_proto_op_py", + ], + tags = [ + "no_pip", # TODO(b/78026780) + "no_windows", # TODO(b/78028010) + ], + ) + native.test_suite( + name = name, + tests = [":" + _test_name("encode_proto", test_filename) + for test_filename in examples], + ) + +register_extension_info( + extension_name = "decode_proto_test_suite", + label_regex_map = { + "deps": "deps:decode_example_.*", + }) + +register_extension_info( + extension_name = "encode_proto_test_suite", + label_regex_map = { + "deps": "deps:encode_example_.*", + }) diff --git a/tensorflow/contrib/proto/python/kernel_tests/decode_proto_fail_test.py b/tensorflow/contrib/proto/python/kernel_tests/decode_proto_fail_test.py new file mode 100644 index 0000000000..5298342ee7 --- /dev/null +++ b/tensorflow/contrib/proto/python/kernel_tests/decode_proto_fail_test.py @@ -0,0 +1,68 @@ +# ============================================================================= +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= + +# Python3 preparedness imports. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.contrib.proto.python.kernel_tests import test_case +from tensorflow.contrib.proto.python.ops import decode_proto_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors +from tensorflow.python.platform import test + + +class DecodeProtoFailTest(test_case.ProtoOpTestCase): + """Test failure cases for DecodeToProto.""" + + def _TestCorruptProtobuf(self, sanitize): + """Test failure cases for DecodeToProto.""" + + # The goal here is to check the error reporting. + # Testing against a variety of corrupt protobufs is + # done by fuzzing. + corrupt_proto = 'This is not a binary protobuf' + + # Numpy silently truncates the strings if you don't specify dtype=object. + batch = np.array(corrupt_proto, dtype=object) + msg_type = 'tensorflow.contrib.proto.TestCase' + field_names = ['sizes'] + field_types = [dtypes.int32] + + with self.test_session() as sess: + ctensor, vtensor = decode_proto_op.decode_proto( + batch, + message_type=msg_type, + field_names=field_names, + output_types=field_types, + sanitize=sanitize) + with self.assertRaisesRegexp(errors.DataLossError, + 'Unable to parse binary protobuf' + '|Failed to consume entire buffer'): + _ = sess.run([ctensor] + vtensor) + + def testCorrupt(self): + self._TestCorruptProtobuf(sanitize=False) + + def testSanitizerCorrupt(self): + self._TestCorruptProtobuf(sanitize=True) + + +if __name__ == '__main__': + test.main() diff --git a/tensorflow/contrib/proto/python/kernel_tests/decode_proto_op_test.py b/tensorflow/contrib/proto/python/kernel_tests/decode_proto_op_test.py new file mode 100644 index 0000000000..d1c13c82bc --- /dev/null +++ b/tensorflow/contrib/proto/python/kernel_tests/decode_proto_op_test.py @@ -0,0 +1,300 @@ +# ============================================================================= +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= +"""Table-driven test for decode_proto op. + +This test is run once with each of the *.TestCase.pbtxt files +in the test directory. +""" +# Python3 preparedness imports. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from google.protobuf import text_format + +from tensorflow.contrib.proto.python.kernel_tests import test_case +from tensorflow.contrib.proto.python.kernel_tests import test_example_pb2 +from tensorflow.contrib.proto.python.ops import decode_proto_op +from tensorflow.python.framework import dtypes +from tensorflow.python.platform import flags +from tensorflow.python.platform import test + +FLAGS = flags.FLAGS + +flags.DEFINE_string('message_text_file', None, + 'A file containing a text serialized TestCase protobuf.') + + +class DecodeProtoOpTest(test_case.ProtoOpTestCase): + + def _compareValues(self, fd, vs, evs): + """Compare lists/arrays of field values.""" + + if len(vs) != len(evs): + self.fail('Field %s decoded %d outputs, expected %d' % + (fd.name, len(vs), len(evs))) + for i, ev in enumerate(evs): + # Special case fuzzy match for float32. TensorFlow seems to mess with + # MAX_FLT slightly and the test doesn't work otherwise. + # TODO(nix): ask on TF list about why MAX_FLT doesn't pass through. + if fd.cpp_type == fd.CPPTYPE_FLOAT: + # Numpy isclose() is better than assertIsClose() which uses an absolute + # value comparison. + self.assertTrue( + np.isclose(vs[i], ev), 'expected %r, actual %r' % (ev, vs[i])) + elif fd.cpp_type == fd.CPPTYPE_STRING: + # In Python3 string tensor values will be represented as bytes, so we + # reencode the proto values to match that. + self.assertEqual(vs[i], ev.encode('ascii')) + else: + # Doubles and other types pass through unscathed. + self.assertEqual(vs[i], ev) + + def _compareRepeatedPrimitiveValue(self, batch_shape, sizes, fields, + field_dict): + """Compare protos of type RepeatedPrimitiveValue. + + Args: + batch_shape: the shape of the input tensor of serialized messages. + sizes: int matrix of repeat counts returned by decode_proto + fields: list of test_example_pb2.FieldSpec (types and expected values) + field_dict: map from field names to decoded numpy tensors of values + """ + + # Check that expected values match. + for field in fields: + values = field_dict[field.name] + self.assertEqual(dtypes.as_dtype(values.dtype), field.dtype) + + fd = field.expected.DESCRIPTOR.fields_by_name[field.name] + + # Values has the same shape as the input plus an extra + # dimension for repeats. + self.assertEqual(list(values.shape)[:-1], batch_shape) + + # Nested messages are represented as TF strings, requiring + # some special handling. + if field.name == 'message_value': + vs = [] + for buf in values.flat: + msg = test_example_pb2.PrimitiveValue() + msg.ParseFromString(buf) + vs.append(msg) + evs = getattr(field.expected, field.name) + if len(vs) != len(evs): + self.fail('Field %s decoded %d outputs, expected %d' % + (fd.name, len(vs), len(evs))) + for v, ev in zip(vs, evs): + self.assertEqual(v, ev) + continue + + # This can be a little confusing. For testing we are using + # RepeatedPrimitiveValue in two ways: it's the proto that we + # decode for testing, and it's used in the expected value as a + # union type. The two cases are slightly different: this is the + # second case. + # We may be fetching the uint64_value from the test proto, but + # in the expected proto we store it in the int64_value field + # because TensorFlow doesn't support unsigned int64. + tf_type_to_primitive_value_field = { + dtypes.float32: + 'float_value', + dtypes.float64: + 'double_value', + dtypes.int32: + 'int32_value', + dtypes.uint8: + 'uint8_value', + dtypes.int8: + 'int8_value', + dtypes.string: + 'string_value', + dtypes.int64: + 'int64_value', + dtypes.bool: + 'bool_value', + # Unhandled TensorFlow types: + # DT_INT16 DT_COMPLEX64 DT_QINT8 DT_QUINT8 DT_QINT32 + # DT_BFLOAT16 DT_QINT16 DT_QUINT16 DT_UINT16 + } + tf_field_name = tf_type_to_primitive_value_field.get(field.dtype) + if tf_field_name is None: + self.fail('Unhandled tensorflow type %d' % field.dtype) + + self._compareValues(fd, values.flat, + getattr(field.expected, tf_field_name)) + + def _runDecodeProtoTests(self, fields, case_sizes, batch_shape, batch, + message_type, message_format, sanitize, + force_disordered=False): + """Run decode tests on a batch of messages. + + Args: + fields: list of test_example_pb2.FieldSpec (types and expected values) + case_sizes: expected sizes array + batch_shape: the shape of the input tensor of serialized messages + batch: list of serialized messages + message_type: descriptor name for messages + message_format: format of messages, 'text' or 'binary' + sanitize: whether to sanitize binary protobuf inputs + force_disordered: whether to force fields encoded out of order. + """ + + if force_disordered: + # Exercise code path that handles out-of-order fields by prepending extra + # fields with tag numbers higher than any real field. Note that this won't + # work with sanitization because that forces reserialization using a + # trusted decoder and encoder. + assert not sanitize + extra_fields = test_example_pb2.ExtraFields() + extra_fields.string_value = 'IGNORE ME' + extra_fields.bool_value = False + extra_msg = extra_fields.SerializeToString() + batch = [extra_msg + msg for msg in batch] + + # Numpy silently truncates the strings if you don't specify dtype=object. + batch = np.array(batch, dtype=object) + batch = np.reshape(batch, batch_shape) + + field_names = [f.name for f in fields] + output_types = [f.dtype for f in fields] + + with self.test_session() as sess: + sizes, vtensor = decode_proto_op.decode_proto( + batch, + message_type=message_type, + field_names=field_names, + output_types=output_types, + message_format=message_format, + sanitize=sanitize) + + vlist = sess.run([sizes] + vtensor) + sizes = vlist[0] + # Values is a list of tensors, one for each field. + value_tensors = vlist[1:] + + # Check that the repeat sizes are correct. + self.assertTrue( + np.all(np.array(sizes.shape) == batch_shape + [len(field_names)])) + + # Check that the decoded sizes match the expected sizes. + self.assertEqual(len(sizes.flat), len(case_sizes)) + self.assertTrue( + np.all(sizes.flat == np.array( + case_sizes, dtype=np.int32))) + + field_dict = dict(zip(field_names, value_tensors)) + + self._compareRepeatedPrimitiveValue(batch_shape, sizes, fields, + field_dict) + + def testBinary(self): + with open(FLAGS.message_text_file, 'r') as fp: + case = text_format.Parse(fp.read(), test_example_pb2.TestCase()) + + batch = [primitive.SerializeToString() for primitive in case.primitive] + self._runDecodeProtoTests( + case.field, + case.sizes, + list(case.shape), + batch, + 'tensorflow.contrib.proto.RepeatedPrimitiveValue', + 'binary', + sanitize=False) + + def testBinaryDisordered(self): + with open(FLAGS.message_text_file, 'r') as fp: + case = text_format.Parse(fp.read(), test_example_pb2.TestCase()) + + batch = [primitive.SerializeToString() for primitive in case.primitive] + self._runDecodeProtoTests( + case.field, + case.sizes, + list(case.shape), + batch, + 'tensorflow.contrib.proto.RepeatedPrimitiveValue', + 'binary', + sanitize=False, + force_disordered=True) + + def testPacked(self): + with open(FLAGS.message_text_file, 'r') as fp: + case = text_format.Parse(fp.read(), test_example_pb2.TestCase()) + + # Now try with the packed serialization. + # We test the packed representations by loading the same test cases + # using PackedPrimitiveValue instead of RepeatedPrimitiveValue. + # To do this we rely on the text format being the same for packed and + # unpacked fields, and reparse the test message using the packed version + # of the proto. + packed_batch = [ + # Note: float_format='.17g' is necessary to ensure preservation of + # doubles and floats in text format. + text_format.Parse( + text_format.MessageToString( + primitive, float_format='.17g'), + test_example_pb2.PackedPrimitiveValue()).SerializeToString() + for primitive in case.primitive + ] + + self._runDecodeProtoTests( + case.field, + case.sizes, + list(case.shape), + packed_batch, + 'tensorflow.contrib.proto.PackedPrimitiveValue', + 'binary', + sanitize=False) + + def testText(self): + with open(FLAGS.message_text_file, 'r') as fp: + case = text_format.Parse(fp.read(), test_example_pb2.TestCase()) + + # Note: float_format='.17g' is necessary to ensure preservation of + # doubles and floats in text format. + text_batch = [ + text_format.MessageToString( + primitive, float_format='.17g') for primitive in case.primitive + ] + + self._runDecodeProtoTests( + case.field, + case.sizes, + list(case.shape), + text_batch, + 'tensorflow.contrib.proto.RepeatedPrimitiveValue', + 'text', + sanitize=False) + + def testSanitizerGood(self): + with open(FLAGS.message_text_file, 'r') as fp: + case = text_format.Parse(fp.read(), test_example_pb2.TestCase()) + + batch = [primitive.SerializeToString() for primitive in case.primitive] + self._runDecodeProtoTests( + case.field, + case.sizes, + list(case.shape), + batch, + 'tensorflow.contrib.proto.RepeatedPrimitiveValue', + 'binary', + sanitize=True) + + +if __name__ == '__main__': + test.main() diff --git a/tensorflow/contrib/proto/python/kernel_tests/encode_proto_op_test.py b/tensorflow/contrib/proto/python/kernel_tests/encode_proto_op_test.py new file mode 100644 index 0000000000..30e58e6336 --- /dev/null +++ b/tensorflow/contrib/proto/python/kernel_tests/encode_proto_op_test.py @@ -0,0 +1,180 @@ +# ============================================================================= +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= +"""Table-driven test for encode_proto op. + +This test is run once with each of the *.TestCase.pbtxt files +in the test directory. + +It tests that encode_proto is a lossless inverse of decode_proto +(for the specified fields). +""" +# Python3 readiness boilerplate +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from google.protobuf import text_format + +from tensorflow.contrib.proto.python.kernel_tests import test_case +from tensorflow.contrib.proto.python.kernel_tests import test_example_pb2 +from tensorflow.contrib.proto.python.ops import decode_proto_op +from tensorflow.contrib.proto.python.ops import encode_proto_op +from tensorflow.python.framework import dtypes +from tensorflow.python.ops import array_ops +from tensorflow.python.platform import flags +from tensorflow.python.platform import test + +FLAGS = flags.FLAGS + +flags.DEFINE_string('message_text_file', None, + 'A file containing a text serialized TestCase protobuf.') + + +class EncodeProtoOpTest(test_case.ProtoOpTestCase): + + def testBadInputs(self): + # Invalid field name + with self.test_session(): + with self.assertRaisesOpError('Unknown field: non_existent_field'): + encode_proto_op.encode_proto( + sizes=[[1]], + values=[np.array([[0.0]], dtype=np.int32)], + message_type='tensorflow.contrib.proto.RepeatedPrimitiveValue', + field_names=['non_existent_field']).eval() + + # Incorrect types. + with self.test_session(): + with self.assertRaisesOpError( + 'Incompatible type for field double_value.'): + encode_proto_op.encode_proto( + sizes=[[1]], + values=[np.array([[0.0]], dtype=np.int32)], + message_type='tensorflow.contrib.proto.RepeatedPrimitiveValue', + field_names=['double_value']).eval() + + # Incorrect shapes of sizes. + with self.test_session(): + with self.assertRaisesOpError( + r'sizes should be batch_size \+ \[len\(field_names\)\]'): + sizes = array_ops.placeholder(dtypes.int32) + values = array_ops.placeholder(dtypes.float64) + encode_proto_op.encode_proto( + sizes=sizes, + values=[values], + message_type='tensorflow.contrib.proto.RepeatedPrimitiveValue', + field_names=['double_value']).eval(feed_dict={ + sizes: [[[0, 0]]], + values: [[0.0]] + }) + + # Inconsistent shapes of values. + with self.test_session(): + with self.assertRaisesOpError( + 'Values must match up to the last dimension'): + sizes = array_ops.placeholder(dtypes.int32) + values1 = array_ops.placeholder(dtypes.float64) + values2 = array_ops.placeholder(dtypes.int32) + (encode_proto_op.encode_proto( + sizes=[[1, 1]], + values=[values1, values2], + message_type='tensorflow.contrib.proto.RepeatedPrimitiveValue', + field_names=['double_value', 'int32_value']).eval(feed_dict={ + values1: [[0.0]], + values2: [[0], [0]] + })) + + def _testRoundtrip(self, in_bufs, message_type, fields): + + field_names = [f.name for f in fields] + out_types = [f.dtype for f in fields] + + with self.test_session() as sess: + sizes, field_tensors = decode_proto_op.decode_proto( + in_bufs, + message_type=message_type, + field_names=field_names, + output_types=out_types) + + out_tensors = encode_proto_op.encode_proto( + sizes, + field_tensors, + message_type=message_type, + field_names=field_names) + + out_bufs, = sess.run([out_tensors]) + + # Check that the re-encoded tensor has the same shape. + self.assertEqual(in_bufs.shape, out_bufs.shape) + + # Compare the input and output. + for in_buf, out_buf in zip(in_bufs.flat, out_bufs.flat): + in_obj = test_example_pb2.RepeatedPrimitiveValue() + in_obj.ParseFromString(in_buf) + + out_obj = test_example_pb2.RepeatedPrimitiveValue() + out_obj.ParseFromString(out_buf) + + # Check that the deserialized objects are identical. + self.assertEqual(in_obj, out_obj) + + # Check that the input and output serialized messages are identical. + # If we fail here, there is a difference in the serialized + # representation but the new serialization still parses. This could + # be harmless (a change in map ordering?) or it could be bad (e.g. + # loss of packing in the encoding). + self.assertEqual(in_buf, out_buf) + + def testRoundtrip(self): + with open(FLAGS.message_text_file, 'r') as fp: + case = text_format.Parse(fp.read(), test_example_pb2.TestCase()) + + in_bufs = [primitive.SerializeToString() for primitive in case.primitive] + + # np.array silently truncates strings if you don't specify dtype=object. + in_bufs = np.reshape(np.array(in_bufs, dtype=object), list(case.shape)) + return self._testRoundtrip( + in_bufs, 'tensorflow.contrib.proto.RepeatedPrimitiveValue', case.field) + + def testRoundtripPacked(self): + with open(FLAGS.message_text_file, 'r') as fp: + case = text_format.Parse(fp.read(), test_example_pb2.TestCase()) + + # Now try with the packed serialization. + # We test the packed representations by loading the same test cases + # using PackedPrimitiveValue instead of RepeatedPrimitiveValue. + # To do this we rely on the text format being the same for packed and + # unpacked fields, and reparse the test message using the packed version + # of the proto. + in_bufs = [ + # Note: float_format='.17g' is necessary to ensure preservation of + # doubles and floats in text format. + text_format.Parse( + text_format.MessageToString( + primitive, float_format='.17g'), + test_example_pb2.PackedPrimitiveValue()).SerializeToString() + for primitive in case.primitive + ] + + # np.array silently truncates strings if you don't specify dtype=object. + in_bufs = np.reshape(np.array(in_bufs, dtype=object), list(case.shape)) + return self._testRoundtrip( + in_bufs, 'tensorflow.contrib.proto.PackedPrimitiveValue', case.field) + + +if __name__ == '__main__': + test.main() diff --git a/tensorflow/contrib/proto/python/kernel_tests/minmax.TestCase.pbtxt b/tensorflow/contrib/proto/python/kernel_tests/minmax.TestCase.pbtxt new file mode 100644 index 0000000000..b170f89c0f --- /dev/null +++ b/tensorflow/contrib/proto/python/kernel_tests/minmax.TestCase.pbtxt @@ -0,0 +1,161 @@ +primitive { + double_value: -1.7976931348623158e+308 + double_value: 2.2250738585072014e-308 + double_value: 1.7976931348623158e+308 + float_value: -3.402823466e+38 + float_value: 1.175494351e-38 + float_value: 3.402823466e+38 + int64_value: -9223372036854775808 + int64_value: 9223372036854775807 + uint64_value: 0 + uint64_value: 18446744073709551615 + int32_value: -2147483648 + int32_value: 2147483647 + fixed64_value: 0 + fixed64_value: 18446744073709551615 + fixed32_value: 0 + fixed32_value: 4294967295 + bool_value: false + bool_value: true + string_value: "" + string_value: "I refer to the infinite." + uint32_value: 0 + uint32_value: 4294967295 + sfixed32_value: -2147483648 + sfixed32_value: 2147483647 + sfixed64_value: -9223372036854775808 + sfixed64_value: 9223372036854775807 + sint32_value: -2147483648 + sint32_value: 2147483647 + sint64_value: -9223372036854775808 + sint64_value: 9223372036854775807 +} +shape: 1 +sizes: 3 +sizes: 3 +sizes: 2 +sizes: 2 +sizes: 2 +sizes: 2 +sizes: 2 +sizes: 2 +sizes: 2 +sizes: 2 +sizes: 2 +sizes: 2 +sizes: 2 +sizes: 2 +field { + name: "double_value" + dtype: DT_DOUBLE + expected { + double_value: -1.7976931348623158e+308 + double_value: 2.2250738585072014e-308 + double_value: 1.7976931348623158e+308 + } +} +field { + name: "float_value" + dtype: DT_FLOAT + expected { + float_value: -3.402823466e+38 + float_value: 1.175494351e-38 + float_value: 3.402823466e+38 + } +} +field { + name: "int64_value" + dtype: DT_INT64 + expected { + int64_value: -9223372036854775808 + int64_value: 9223372036854775807 + } +} +field { + name: "uint64_value" + dtype: DT_INT64 + expected { + int64_value: 0 + int64_value: -1 + } +} +field { + name: "int32_value" + dtype: DT_INT32 + expected { + int32_value: -2147483648 + int32_value: 2147483647 + } +} +field { + name: "fixed64_value" + dtype: DT_INT64 + expected { + int64_value: 0 + int64_value: -1 # unsigned is 18446744073709551615 + } +} +field { + name: "fixed32_value" + dtype: DT_INT32 + expected { + int32_value: 0 + int32_value: -1 # unsigned is 4294967295 + } +} +field { + name: "bool_value" + dtype: DT_BOOL + expected { + bool_value: false + bool_value: true + } +} +field { + name: "string_value" + dtype: DT_STRING + expected { + string_value: "" + string_value: "I refer to the infinite." + } +} +field { + name: "uint32_value" + dtype: DT_INT32 + expected { + int32_value: 0 + int32_value: -1 # unsigned is 4294967295 + } +} +field { + name: "sfixed32_value" + dtype: DT_INT32 + expected { + int32_value: -2147483648 + int32_value: 2147483647 + } +} +field { + name: "sfixed64_value" + dtype: DT_INT64 + expected { + int64_value: -9223372036854775808 + int64_value: 9223372036854775807 + } +} +field { + name: "sint32_value" + dtype: DT_INT32 + expected { + int32_value: -2147483648 + int32_value: 2147483647 + } +} +field { + name: "sint64_value" + dtype: DT_INT64 + expected { + int64_value: -9223372036854775808 + int64_value: 9223372036854775807 + } +} diff --git a/tensorflow/contrib/proto/python/kernel_tests/nested.TestCase.pbtxt b/tensorflow/contrib/proto/python/kernel_tests/nested.TestCase.pbtxt new file mode 100644 index 0000000000..c664e52851 --- /dev/null +++ b/tensorflow/contrib/proto/python/kernel_tests/nested.TestCase.pbtxt @@ -0,0 +1,16 @@ +primitive { + message_value { + double_value: 23.5 + } +} +shape: 1 +sizes: 1 +field { + name: "message_value" + dtype: DT_STRING + expected { + message_value { + double_value: 23.5 + } + } +} diff --git a/tensorflow/contrib/proto/python/kernel_tests/optional.TestCase.pbtxt b/tensorflow/contrib/proto/python/kernel_tests/optional.TestCase.pbtxt new file mode 100644 index 0000000000..125651d7ea --- /dev/null +++ b/tensorflow/contrib/proto/python/kernel_tests/optional.TestCase.pbtxt @@ -0,0 +1,20 @@ +primitive { + bool_value: true +} +shape: 1 +sizes: 1 +sizes: 0 +field { + name: "bool_value" + dtype: DT_BOOL + expected { + bool_value: true + } +} +field { + name: "double_value" + dtype: DT_DOUBLE + expected { + double_value: 0.0 + } +} diff --git a/tensorflow/contrib/proto/python/kernel_tests/promote_unsigned.TestCase.pbtxt b/tensorflow/contrib/proto/python/kernel_tests/promote_unsigned.TestCase.pbtxt new file mode 100644 index 0000000000..db7555bf2d --- /dev/null +++ b/tensorflow/contrib/proto/python/kernel_tests/promote_unsigned.TestCase.pbtxt @@ -0,0 +1,21 @@ +primitive { + fixed32_value: 4294967295 + uint32_value: 4294967295 +} +shape: 1 +sizes: 1 +sizes: 1 +field { + name: "fixed32_value" + dtype: DT_INT64 + expected { + int64_value: 4294967295 + } +} +field { + name: "uint32_value" + dtype: DT_INT64 + expected { + int64_value: 4294967295 + } +} diff --git a/tensorflow/contrib/proto/python/kernel_tests/ragged.TestCase.pbtxt b/tensorflow/contrib/proto/python/kernel_tests/ragged.TestCase.pbtxt new file mode 100644 index 0000000000..61c7ac53f7 --- /dev/null +++ b/tensorflow/contrib/proto/python/kernel_tests/ragged.TestCase.pbtxt @@ -0,0 +1,32 @@ +primitive { + double_value: 23.5 + double_value: 123.0 + bool_value: true +} +primitive { + double_value: 3.1 + bool_value: false +} +shape: 2 +sizes: 2 +sizes: 1 +sizes: 1 +sizes: 1 +field { + name: "double_value" + dtype: DT_DOUBLE + expected { + double_value: 23.5 + double_value: 123.0 + double_value: 3.1 + double_value: 0.0 + } +} +field { + name: "bool_value" + dtype: DT_BOOL + expected { + bool_value: true + bool_value: false + } +} diff --git a/tensorflow/contrib/proto/python/kernel_tests/shaped_batch.TestCase.pbtxt b/tensorflow/contrib/proto/python/kernel_tests/shaped_batch.TestCase.pbtxt new file mode 100644 index 0000000000..f4828076d5 --- /dev/null +++ b/tensorflow/contrib/proto/python/kernel_tests/shaped_batch.TestCase.pbtxt @@ -0,0 +1,62 @@ +primitive { + double_value: 23.5 + bool_value: true +} +primitive { + double_value: 44.0 + bool_value: false +} +primitive { + double_value: 3.14159 + bool_value: true +} +primitive { + double_value: 1.414 + bool_value: true +} +primitive { + double_value: -32.2 + bool_value: false +} +primitive { + double_value: 0.0001 + bool_value: true +} +shape: 3 +shape: 2 +sizes: 1 +sizes: 1 +sizes: 1 +sizes: 1 +sizes: 1 +sizes: 1 +sizes: 1 +sizes: 1 +sizes: 1 +sizes: 1 +sizes: 1 +sizes: 1 +field { + name: "double_value" + dtype: DT_DOUBLE + expected { + double_value: 23.5 + double_value: 44.0 + double_value: 3.14159 + double_value: 1.414 + double_value: -32.2 + double_value: 0.0001 + } +} +field { + name: "bool_value" + dtype: DT_BOOL + expected { + bool_value: true + bool_value: false + bool_value: true + bool_value: true + bool_value: false + bool_value: true + } +} diff --git a/tensorflow/contrib/proto/python/kernel_tests/simple.TestCase.pbtxt b/tensorflow/contrib/proto/python/kernel_tests/simple.TestCase.pbtxt new file mode 100644 index 0000000000..dc20ac147b --- /dev/null +++ b/tensorflow/contrib/proto/python/kernel_tests/simple.TestCase.pbtxt @@ -0,0 +1,21 @@ +primitive { + double_value: 23.5 + bool_value: true +} +shape: 1 +sizes: 1 +sizes: 1 +field { + name: "double_value" + dtype: DT_DOUBLE + expected { + double_value: 23.5 + } +} +field { + name: "bool_value" + dtype: DT_BOOL + expected { + bool_value: true + } +} diff --git a/tensorflow/contrib/proto/python/kernel_tests/test_case.py b/tensorflow/contrib/proto/python/kernel_tests/test_case.py new file mode 100644 index 0000000000..b95202c5df --- /dev/null +++ b/tensorflow/contrib/proto/python/kernel_tests/test_case.py @@ -0,0 +1,35 @@ +# ============================================================================= +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= +"""Test case base for testing proto operations.""" + +# Python3 preparedness imports. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import ctypes as ct +import os + +from tensorflow.python.platform import test + + +class ProtoOpTestCase(test.TestCase): + + def __init__(self, methodName='runTest'): # pylint: disable=invalid-name + super(ProtoOpTestCase, self).__init__(methodName) + lib = os.path.join(os.path.dirname(__file__), 'libtestexample.so') + if os.path.isfile(lib): + ct.cdll.LoadLibrary(lib) diff --git a/tensorflow/contrib/proto/python/kernel_tests/test_example.proto b/tensorflow/contrib/proto/python/kernel_tests/test_example.proto new file mode 100644 index 0000000000..dc495034ff --- /dev/null +++ b/tensorflow/contrib/proto/python/kernel_tests/test_example.proto @@ -0,0 +1,149 @@ +// Test description and protos to work with it. +// +// Many of the protos in this file are for unit tests that haven't been written yet. + +syntax = "proto2"; + +import "tensorflow/core/framework/types.proto"; + +package tensorflow.contrib.proto; + +// A TestCase holds a proto and a bunch of assertions +// about how it should decode. +message TestCase { + // A batch of primitives to be serialized and decoded. + repeated RepeatedPrimitiveValue primitive = 1; + // The shape of the batch. + repeated int32 shape = 2; + // Expected sizes for each field. + repeated int32 sizes = 3; + // Expected values for each field. + repeated FieldSpec field = 4; +}; + +// FieldSpec describes the expected output for a single field. +message FieldSpec { + optional string name = 1; + optional tensorflow.DataType dtype = 2; + optional RepeatedPrimitiveValue expected = 3; +}; + +message TestValue { + optional PrimitiveValue primitive_value = 1; + optional EnumValue enum_value = 2; + optional MessageValue message_value = 3; + optional RepeatedMessageValue repeated_message_value = 4; + optional RepeatedPrimitiveValue repeated_primitive_value = 6; +} + +message PrimitiveValue { + optional double double_value = 1; + optional float float_value = 2; + optional int64 int64_value = 3; + optional uint64 uint64_value = 4; + optional int32 int32_value = 5; + optional fixed64 fixed64_value = 6; + optional fixed32 fixed32_value = 7; + optional bool bool_value = 8; + optional string string_value = 9; + optional bytes bytes_value = 12; + optional uint32 uint32_value = 13; + optional sfixed32 sfixed32_value = 15; + optional sfixed64 sfixed64_value = 16; + optional sint32 sint32_value = 17; + optional sint64 sint64_value = 18; +} + +// NOTE: This definition must be kept in sync with PackedPrimitiveValue. +message RepeatedPrimitiveValue { + repeated double double_value = 1; + repeated float float_value = 2; + repeated int64 int64_value = 3; + repeated uint64 uint64_value = 4; + repeated int32 int32_value = 5; + repeated fixed64 fixed64_value = 6; + repeated fixed32 fixed32_value = 7; + repeated bool bool_value = 8; + repeated string string_value = 9; + repeated bytes bytes_value = 12; + repeated uint32 uint32_value = 13; + repeated sfixed32 sfixed32_value = 15; + repeated sfixed64 sfixed64_value = 16; + repeated sint32 sint32_value = 17; + repeated sint64 sint64_value = 18; + repeated PrimitiveValue message_value = 19; +} + +// A PackedPrimitiveValue looks exactly the same as a RepeatedPrimitiveValue +// in the text format, but the binary serializion is different. +// We test the packed representations by loading the same test cases +// using this definition instead of RepeatedPrimitiveValue. +// NOTE: This definition must be kept in sync with RepeatedPrimitiveValue +// in every way except the packed=true declaration. +message PackedPrimitiveValue { + repeated double double_value = 1 [packed = true]; + repeated float float_value = 2 [packed = true]; + repeated int64 int64_value = 3 [packed = true]; + repeated uint64 uint64_value = 4 [packed = true]; + repeated int32 int32_value = 5 [packed = true]; + repeated fixed64 fixed64_value = 6 [packed = true]; + repeated fixed32 fixed32_value = 7 [packed = true]; + repeated bool bool_value = 8 [packed = true]; + repeated string string_value = 9; + repeated bytes bytes_value = 12; + repeated uint32 uint32_value = 13 [packed = true]; + repeated sfixed32 sfixed32_value = 15 [packed = true]; + repeated sfixed64 sfixed64_value = 16 [packed = true]; + repeated sint32 sint32_value = 17 [packed = true]; + repeated sint64 sint64_value = 18 [packed = true]; + repeated PrimitiveValue message_value = 19; +} + +message EnumValue { + enum Color { + RED = 0; + ORANGE = 1; + YELLOW = 2; + GREEN = 3; + BLUE = 4; + INDIGO = 5; + VIOLET = 6; + }; + optional Color enum_value = 14; + repeated Color repeated_enum_value = 15; +} + + +message InnerMessageValue { + optional float float_value = 2; + repeated bytes bytes_values = 8; +} + +message MiddleMessageValue { + repeated int32 int32_values = 5; + optional InnerMessageValue message_value = 11; + optional uint32 uint32_value = 13; +} + +message MessageValue { + optional double double_value = 1; + optional MiddleMessageValue message_value = 11; +} + +message RepeatedMessageValue { + message NestedMessageValue { + optional float float_value = 2; + repeated bytes bytes_values = 8; + } + + repeated NestedMessageValue message_values = 11; +} + +// Message containing fields with field numbers higher than any field above. An +// instance of this message is prepended to each binary message in the test to +// exercise the code path that handles fields encoded out of order of field +// number. +message ExtraFields { + optional string string_value = 1776; + optional bool bool_value = 1777; +} diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD index 376644718f..a0bae23a7c 100644 --- a/tensorflow/tools/pip_package/BUILD +++ b/tensorflow/tools/pip_package/BUILD @@ -74,6 +74,7 @@ COMMON_PIP_DEPS = [ "//tensorflow/contrib/labeled_tensor:labeled_tensor_pip", "//tensorflow/contrib/nn:nn_py", "//tensorflow/contrib/predictor:predictor_pip", + "//tensorflow/contrib/proto:proto_pip", "//tensorflow/contrib/receptive_field:receptive_field_pip", "//tensorflow/contrib/session_bundle:session_bundle_pip", "//tensorflow/contrib/signal:signal_py", -- GitLab From d995be2debded727f2b99bb87c0d209604a5bb4b Mon Sep 17 00:00:00 2001 From: Jiri Simsa Date: Mon, 16 Apr 2018 14:47:31 -0700 Subject: [PATCH 027/434] Porting tests for `rpc_op` to OS. PiperOrigin-RevId: 193102564 --- tensorflow/contrib/BUILD | 1 + tensorflow/contrib/__init__.py | 1 + tensorflow/contrib/cmake/tf_python.cmake | 3 +- tensorflow/contrib/rpc/BUILD | 16 + .../contrib/rpc/python/kernel_tests/BUILD | 80 +++++ .../rpc/python/kernel_tests/rpc_op_test.py | 71 ++++ .../python/kernel_tests/rpc_op_test_base.py | 336 ++++++++++++++++++ .../kernel_tests/rpc_op_test_servicer.py | 101 ++++++ .../python/kernel_tests/test_example.proto | 171 +++++++++ .../core/platform/default/build_config.bzl | 86 ++++- tensorflow/tools/pip_package/BUILD | 1 + tensorflow/workspace.bzl | 4 + 12 files changed, 867 insertions(+), 4 deletions(-) create mode 100644 tensorflow/contrib/rpc/python/kernel_tests/BUILD create mode 100644 tensorflow/contrib/rpc/python/kernel_tests/rpc_op_test.py create mode 100644 tensorflow/contrib/rpc/python/kernel_tests/rpc_op_test_base.py create mode 100644 tensorflow/contrib/rpc/python/kernel_tests/rpc_op_test_servicer.py create mode 100644 tensorflow/contrib/rpc/python/kernel_tests/test_example.proto diff --git a/tensorflow/contrib/BUILD b/tensorflow/contrib/BUILD index ae68f4aec4..7e47516550 100644 --- a/tensorflow/contrib/BUILD +++ b/tensorflow/contrib/BUILD @@ -87,6 +87,7 @@ py_library( "//tensorflow/contrib/remote_fused_graph/pylib:remote_fused_graph_ops_py", "//tensorflow/contrib/resampler:resampler_py", "//tensorflow/contrib/rnn:rnn_py", + "//tensorflow/contrib/rpc", "//tensorflow/contrib/saved_model:saved_model_py", "//tensorflow/contrib/seq2seq:seq2seq_py", "//tensorflow/contrib/signal:signal_py", diff --git a/tensorflow/contrib/__init__.py b/tensorflow/contrib/__init__.py index e27ece8fa5..36cc5144d0 100644 --- a/tensorflow/contrib/__init__.py +++ b/tensorflow/contrib/__init__.py @@ -71,6 +71,7 @@ from tensorflow.contrib import recurrent from tensorflow.contrib import reduce_slice_ops from tensorflow.contrib import resampler from tensorflow.contrib import rnn +from tensorflow.contrib import rpc from tensorflow.contrib import saved_model from tensorflow.contrib import seq2seq from tensorflow.contrib import signal diff --git a/tensorflow/contrib/cmake/tf_python.cmake b/tensorflow/contrib/cmake/tf_python.cmake index 21f59d2563..f6aaf41f73 100755 --- a/tensorflow/contrib/cmake/tf_python.cmake +++ b/tensorflow/contrib/cmake/tf_python.cmake @@ -347,7 +347,8 @@ GENERATE_PYTHON_OP_LIB("random_ops") GENERATE_PYTHON_OP_LIB("remote_fused_graph_ops" DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/remote_fused_graph/pylib/python/ops/gen_remote_fused_graph_ops.py) GENERATE_PYTHON_OP_LIB("resource_variable_ops") -GENERATE_PYTHON_OP_LIB("rpc_ops") +GENERATE_PYTHON_OP_LIB("rpc_ops" + DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/rpc/python/ops/gen_rpc_op.py) GENERATE_PYTHON_OP_LIB("script_ops") GENERATE_PYTHON_OP_LIB("sdca_ops") GENERATE_PYTHON_OP_LIB("set_ops") diff --git a/tensorflow/contrib/rpc/BUILD b/tensorflow/contrib/rpc/BUILD index 597f18c771..dbd311a276 100644 --- a/tensorflow/contrib/rpc/BUILD +++ b/tensorflow/contrib/rpc/BUILD @@ -4,6 +4,8 @@ licenses(["notice"]) # Apache 2.0 exports_files(["LICENSE"]) +load("//tensorflow/core:platform/default/build_config_root.bzl", "if_static") + py_library( name = "rpc", srcs = [ @@ -11,3 +13,17 @@ py_library( ], deps = ["//tensorflow/contrib/rpc/python/ops:rpc_op_py"], ) + +py_library( + name = "rpc_pip", + data = if_static( + [], + otherwise = ["//tensorflow/contrib/rpc/python/kernel_tests:libtestexample.so"], + ), + deps = [ + ":rpc", + "//tensorflow/contrib/rpc/python/kernel_tests:py_test_deps", + "//tensorflow/contrib/rpc/python/kernel_tests:rpc_op_test_base", + "//tensorflow/contrib/rpc/python/kernel_tests:rpc_op_test_servicer", + ], +) diff --git a/tensorflow/contrib/rpc/python/kernel_tests/BUILD b/tensorflow/contrib/rpc/python/kernel_tests/BUILD new file mode 100644 index 0000000000..2311c15a68 --- /dev/null +++ b/tensorflow/contrib/rpc/python/kernel_tests/BUILD @@ -0,0 +1,80 @@ +# TODO(b/76425722): Port everything in here to OS (currently excluded). + +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) # Apache 2.0 + +exports_files(["LICENSE"]) + +load("//tensorflow:tensorflow.bzl", "tf_py_test") +load("//tensorflow:tensorflow.bzl", "tf_cc_shared_object") +load("//tensorflow/core:platform/default/build_config_root.bzl", "if_static") +load("//tensorflow/core:platform/default/build_config.bzl", "tf_proto_library") +# Placeholder for loading internal BUILD rule. + +tf_proto_library( + name = "test_example_proto", + srcs = ["test_example.proto"], + has_services = 1, + cc_api_version = 2, + protodeps = ["//tensorflow/core:protos_all"], +) + +py_library( + name = "py_test_deps", + deps = [":test_example_proto_py"], +) + +py_library( + name = "rpc_op_test_base", + srcs = ["rpc_op_test_base.py"], + deps = [ + ":test_example_proto_py", + "//tensorflow/contrib/proto", + "//tensorflow/contrib/rpc", + "//tensorflow/core:protos_all_py", + "//tensorflow/python:dtypes", + "//tensorflow/python:errors", + "//third_party/py/numpy", + ], +) + +py_library( + name = "rpc_op_test_servicer", + srcs = ["rpc_op_test_servicer.py"], + deps = [ + ":py_test_deps", + ":rpc_op_test_base", + "//tensorflow/core:protos_all_py", + "//third_party/py/numpy", + ], +) + +tf_cc_shared_object( + name = "libtestexample.so", + linkstatic = 1, + deps = [ + ":test_example_proto_cc", + ], +) + +tf_py_test( + name = "rpc_op_test", + size = "small", + srcs = ["rpc_op_test.py"], + additional_deps = [ + ":py_test_deps", + ":rpc_op_test_base", + ":rpc_op_test_servicer", + "//tensorflow/core:protos_all_py", + "//tensorflow/python:client_testlib", + ], + data = if_static( + [], + otherwise = [":libtestexample.so"], + ), + tags = [ + "no_pip", # TODO(b/78026780) + "no_windows", # TODO(b/78028010) + ], +) diff --git a/tensorflow/contrib/rpc/python/kernel_tests/rpc_op_test.py b/tensorflow/contrib/rpc/python/kernel_tests/rpc_op_test.py new file mode 100644 index 0000000000..e2e0dbc7a2 --- /dev/null +++ b/tensorflow/contrib/rpc/python/kernel_tests/rpc_op_test.py @@ -0,0 +1,71 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= + +"""Tests for RpcOp.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import ctypes as ct +import os + +import grpc +from grpc.framework.foundation import logging_pool +import portpicker + +from tensorflow.contrib.rpc.python.kernel_tests import rpc_op_test_base +from tensorflow.contrib.rpc.python.kernel_tests import rpc_op_test_servicer +from tensorflow.contrib.rpc.python.kernel_tests import test_example_pb2_grpc +from tensorflow.python.platform import test + + +class RpcOpTest(test.TestCase, rpc_op_test_base.RpcOpTestBase): + _protocol = 'grpc' + + invalid_method_string = 'Method not found' + + def __init__(self, methodName='runTest'): # pylint: disable=invalid-name + super(RpcOpTest, self).__init__(methodName) + lib = os.path.join(os.path.dirname(__file__), 'libtestexample.so') + if os.path.isfile(lib): + ct.cdll.LoadLibrary(lib) + + def get_method_name(self, suffix): + return '/tensorflow.contrib.rpc.TestCaseService/%s' % suffix + + def setUp(self): + super(RpcOpTest, self).setUp() + + service_port = portpicker.pick_unused_port() + + server = grpc.server(logging_pool.pool(max_workers=25)) + servicer = rpc_op_test_servicer.RpcOpTestServicer() + test_example_pb2_grpc.add_TestCaseServiceServicer_to_server( + servicer, server) + self._address = 'localhost:%d' % service_port + server.add_insecure_port(self._address) + server.start() + self._server = server + + def tearDown(self): + # TODO(ebrevdo): Figure out why this sometimes times out. + # self._service.ExitLoop() + # self._service_thread.join() + # self._server.stop() + super(RpcOpTest, self).tearDown() + + +if __name__ == '__main__': + test.main() diff --git a/tensorflow/contrib/rpc/python/kernel_tests/rpc_op_test_base.py b/tensorflow/contrib/rpc/python/kernel_tests/rpc_op_test_base.py new file mode 100644 index 0000000000..89f3ee1a1c --- /dev/null +++ b/tensorflow/contrib/rpc/python/kernel_tests/rpc_op_test_base.py @@ -0,0 +1,336 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= + +"""Base class for RpcOp tests.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import itertools + +import numpy as np + +from tensorflow.contrib.proto.python.ops import decode_proto_op +from tensorflow.contrib.proto.python.ops import encode_proto_op +from tensorflow.contrib.rpc.python.kernel_tests import test_example_pb2 +from tensorflow.contrib.rpc.python.ops import rpc_op +from tensorflow.core.protobuf import config_pb2 +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors + +__all__ = ['I_WARNED_YOU', 'RpcOpTestBase'] + +I_WARNED_YOU = 'I warned you!' + + +class RpcOpTestBase(object): + # pylint: disable=missing-docstring,invalid-name + """Base class for RpcOp tests.""" + + def get_method_name(self, suffix): + raise NotImplementedError + + def rpc(self, *args, **kwargs): + return rpc_op.rpc(*args, protocol=self._protocol, **kwargs) + + def try_rpc(self, *args, **kwargs): + return rpc_op.try_rpc(*args, protocol=self._protocol, **kwargs) + + def testScalarHostPortRpc(self): + with self.test_session() as sess: + request_tensors = ( + test_example_pb2.TestCase(shape=[1, 2, 3]).SerializeToString()) + response_tensors = self.rpc( + method=self.get_method_name('IncrementTestShapes'), + address=self._address, + request=request_tensors) + self.assertEqual(response_tensors.shape, ()) + response_values = sess.run(response_tensors) + response_message = test_example_pb2.TestCase() + self.assertTrue(response_message.ParseFromString(response_values)) + self.assertAllEqual([2, 3, 4], response_message.shape) + + def testScalarHostPortTryRpc(self): + with self.test_session() as sess: + request_tensors = ( + test_example_pb2.TestCase(shape=[1, 2, 3]).SerializeToString()) + response_tensors, status_code, status_message = self.try_rpc( + method=self.get_method_name('IncrementTestShapes'), + address=self._address, + request=request_tensors) + self.assertEqual(status_code.shape, ()) + self.assertEqual(status_message.shape, ()) + self.assertEqual(response_tensors.shape, ()) + response_values, status_code_values, status_message_values = ( + sess.run((response_tensors, status_code, status_message))) + response_message = test_example_pb2.TestCase() + self.assertTrue(response_message.ParseFromString(response_values)) + self.assertAllEqual([2, 3, 4], response_message.shape) + # For the base Rpc op, don't expect to get error status back. + self.assertEqual(errors.OK, status_code_values) + self.assertEqual(b'', status_message_values) + + def testEmptyHostPortRpc(self): + with self.test_session() as sess: + request_tensors = [] + response_tensors = self.rpc( + method=self.get_method_name('IncrementTestShapes'), + address=self._address, + request=request_tensors) + self.assertAllEqual(response_tensors.shape, [0]) + response_values = sess.run(response_tensors) + self.assertAllEqual(response_values.shape, [0]) + + def testInvalidAddresses(self): + with self.test_session() as sess: + with self.assertRaisesOpError(self.invalid_method_string): + sess.run( + self.rpc( + method='/InvalidService.IncrementTestShapes', + address=self._address, + request='')) + + with self.assertRaisesOpError(self.invalid_method_string): + sess.run( + self.rpc( + method=self.get_method_name('InvalidMethodName'), + address=self._address, + request='')) + + # This also covers the case of address='' + # and address='localhost:293874293874' + with self.assertRaises(errors.UnavailableError): + sess.run( + self.rpc( + method=self.get_method_name('IncrementTestShapes'), + address='unix:/tmp/this_unix_socket_doesnt_exist_97820348!!@', + request='')) + + # Test invalid method with the TryRpc op + _, status_code_value, status_message_value = sess.run( + self.try_rpc( + method=self.get_method_name('InvalidMethodName'), + address=self._address, + request='')) + self.assertEqual(errors.UNIMPLEMENTED, status_code_value) + self.assertTrue( + self.invalid_method_string in status_message_value.decode('ascii')) + + def testAlwaysFailingMethod(self): + with self.test_session() as sess: + response_tensors = self.rpc( + method=self.get_method_name('AlwaysFailWithInvalidArgument'), + address=self._address, + request='') + self.assertEqual(response_tensors.shape, ()) + with self.assertRaisesOpError(I_WARNED_YOU): + sess.run(response_tensors) + + def testSometimesFailingMethodWithManyRequests(self): + with self.test_session() as sess: + # Fail hard by default. + response_tensors = self.rpc( + method=self.get_method_name('SometimesFailWithInvalidArgument'), + address=self._address, + request=[''] * 20) + self.assertEqual(response_tensors.shape, (20,)) + with self.assertRaisesOpError(I_WARNED_YOU): + sess.run(response_tensors) + + # Don't fail hard, use TryRpc - return the failing status instead. + response_tensors, status_code, status_message = self.try_rpc( + method=self.get_method_name('SometimesFailWithInvalidArgument'), + address=self._address, + request=[''] * 20) + self.assertEqual(response_tensors.shape, (20,)) + self.assertEqual(status_code.shape, (20,)) + self.assertEqual(status_message.shape, (20,)) + status_code_values, status_message_values = sess.run((status_code, + status_message)) + self.assertTrue([ + x in (errors.OK, errors.INVALID_ARGUMENT) for x in status_code_values + ]) + expected_message_values = np.where( + status_code_values == errors.INVALID_ARGUMENT, + I_WARNED_YOU.encode('ascii'), b'') + self.assertAllEqual(expected_message_values, status_message_values) + + def testVecHostPortRpc(self): + with self.test_session() as sess: + request_tensors = [ + test_example_pb2.TestCase( + shape=[i, i + 1, i + 2]).SerializeToString() for i in range(20) + ] + response_tensors = self.rpc( + method=self.get_method_name('IncrementTestShapes'), + address=self._address, + request=request_tensors) + self.assertEqual(response_tensors.shape, (20,)) + response_values = sess.run(response_tensors) + self.assertEqual(response_values.shape, (20,)) + for i in range(20): + response_message = test_example_pb2.TestCase() + self.assertTrue(response_message.ParseFromString(response_values[i])) + self.assertAllEqual([i + 1, i + 2, i + 3], response_message.shape) + + def testVecHostPortManyParallelRpcs(self): + with self.test_session() as sess: + request_tensors = [ + test_example_pb2.TestCase( + shape=[i, i + 1, i + 2]).SerializeToString() for i in range(20) + ] + many_response_tensors = [ + self.rpc( + method=self.get_method_name('IncrementTestShapes'), + address=self._address, + request=request_tensors) for _ in range(10) + ] + # Launch parallel 10 calls to the RpcOp, each containing + # 20 rpc requests. + many_response_values = sess.run(many_response_tensors) + self.assertEqual(10, len(many_response_values)) + for response_values in many_response_values: + self.assertEqual(response_values.shape, (20,)) + for i in range(20): + response_message = test_example_pb2.TestCase() + self.assertTrue(response_message.ParseFromString(response_values[i])) + self.assertAllEqual([i + 1, i + 2, i + 3], response_message.shape) + + def testVecHostPortRpcUsingEncodeAndDecodeProto(self): + with self.test_session() as sess: + request_tensors = encode_proto_op.encode_proto( + message_type='tensorflow.contrib.rpc.TestCase', + field_names=['shape'], + sizes=[[3]] * 20, + values=[ + [[i, i + 1, i + 2] for i in range(20)], + ]) + response_tensor_strings = self.rpc( + method=self.get_method_name('IncrementTestShapes'), + address=self._address, + request=request_tensors) + _, (response_shape,) = decode_proto_op.decode_proto( + bytes=response_tensor_strings, + message_type='tensorflow.contrib.rpc.TestCase', + field_names=['shape'], + output_types=[dtypes.int32]) + response_shape_values = sess.run(response_shape) + self.assertAllEqual([[i + 1, i + 2, i + 3] + for i in range(20)], response_shape_values) + + def testVecHostPortRpcCancelsUponSessionTimeOutWhenSleepingForever(self): + with self.test_session() as sess: + request_tensors = [''] * 25 # This will launch 25 RPC requests. + response_tensors = self.rpc( + method=self.get_method_name('SleepForever'), + address=self._address, + request=request_tensors) + for timeout_ms in [1, 500, 1000]: + options = config_pb2.RunOptions(timeout_in_ms=timeout_ms) + with self.assertRaises((errors.UnavailableError, + errors.DeadlineExceededError)): + sess.run(response_tensors, options=options) + + def testVecHostPortRpcCancelsUponConfiguredTimeOutWhenSleepingForever(self): + with self.test_session() as sess: + request_tensors = [''] * 25 # This will launch 25 RPC requests. + response_tensors = self.rpc( + method=self.get_method_name('SleepForever'), + address=self._address, + timeout_in_ms=1000, + request=request_tensors) + with self.assertRaises(errors.DeadlineExceededError): + sess.run(response_tensors) + + def testTryRpcPropagatesDeadlineErrorWithSometimesTimingOutRequests(self): + with self.test_session() as sess: + response_tensors, status_code, status_message = self.try_rpc( + method=self.get_method_name('SometimesSleepForever'), + timeout_in_ms=1000, + address=self._address, + request=[''] * 20) + self.assertEqual(response_tensors.shape, (20,)) + self.assertEqual(status_code.shape, (20,)) + self.assertEqual(status_message.shape, (20,)) + status_code_values = sess.run(status_code) + self.assertTrue([ + x in (errors.OK, errors.DEADLINE_EXCEEDED) for x in status_code_values + ]) + + def testTryRpcWithMultipleAddressesSingleRequest(self): + flatten = lambda x: list(itertools.chain.from_iterable(x)) + with self.test_session() as sess: + addresses = flatten([[ + self._address, 'unix:/tmp/this_unix_socket_doesnt_exist_97820348!!@' + ] for _ in range(10)]) + request = test_example_pb2.TestCase(shape=[0, 1, 2]).SerializeToString() + response_tensors, status_code, _ = self.try_rpc( + method=self.get_method_name('IncrementTestShapes'), + address=addresses, + request=request) + response_tensors_values, status_code_values = sess.run((response_tensors, + status_code)) + self.assertAllEqual( + flatten([errors.OK, errors.UNAVAILABLE] for _ in range(10)), + status_code_values) + for i in range(10): + self.assertTrue(response_tensors_values[2 * i]) + self.assertFalse(response_tensors_values[2 * i + 1]) + + def testTryRpcWithMultipleMethodsSingleRequest(self): + flatten = lambda x: list(itertools.chain.from_iterable(x)) + with self.test_session() as sess: + methods = flatten( + [[self.get_method_name('IncrementTestShapes'), 'InvalidMethodName'] + for _ in range(10)]) + request = test_example_pb2.TestCase(shape=[0, 1, 2]).SerializeToString() + response_tensors, status_code, _ = self.try_rpc( + method=methods, address=self._address, request=request) + response_tensors_values, status_code_values = sess.run((response_tensors, + status_code)) + self.assertAllEqual( + flatten([errors.OK, errors.UNIMPLEMENTED] for _ in range(10)), + status_code_values) + for i in range(10): + self.assertTrue(response_tensors_values[2 * i]) + self.assertFalse(response_tensors_values[2 * i + 1]) + + def testTryRpcWithMultipleAddressesAndRequests(self): + flatten = lambda x: list(itertools.chain.from_iterable(x)) + with self.test_session() as sess: + addresses = flatten([[ + self._address, 'unix:/tmp/this_unix_socket_doesnt_exist_97820348!!@' + ] for _ in range(10)]) + requests = [ + test_example_pb2.TestCase( + shape=[i, i + 1, i + 2]).SerializeToString() for i in range(20) + ] + response_tensors, status_code, _ = self.try_rpc( + method=self.get_method_name('IncrementTestShapes'), + address=addresses, + request=requests) + response_tensors_values, status_code_values = sess.run((response_tensors, + status_code)) + self.assertAllEqual( + flatten([errors.OK, errors.UNAVAILABLE] for _ in range(10)), + status_code_values) + for i in range(20): + if i % 2 == 1: + self.assertFalse(response_tensors_values[i]) + else: + response_message = test_example_pb2.TestCase() + self.assertTrue( + response_message.ParseFromString(response_tensors_values[i])) + self.assertAllEqual([i + 1, i + 2, i + 3], response_message.shape) diff --git a/tensorflow/contrib/rpc/python/kernel_tests/rpc_op_test_servicer.py b/tensorflow/contrib/rpc/python/kernel_tests/rpc_op_test_servicer.py new file mode 100644 index 0000000000..7cbd636cb1 --- /dev/null +++ b/tensorflow/contrib/rpc/python/kernel_tests/rpc_op_test_servicer.py @@ -0,0 +1,101 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= + +"""Test servicer for RpcOp tests.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import random +import time + +import grpc + +from tensorflow.contrib.rpc.python.kernel_tests import rpc_op_test_base +from tensorflow.contrib.rpc.python.kernel_tests import test_example_pb2_grpc + + +class RpcOpTestServicer(test_example_pb2_grpc.TestCaseServiceServicer): + """Test servicer for RpcOp tests.""" + + def IncrementTestShapes(self, request, context): + """Increment the entries in the shape attribute of request. + + Args: + request: input TestCase. + context: the rpc context. + + Returns: + output TestCase. + """ + for i in range(len(request.shape)): + request.shape[i] += 1 + return request + + def AlwaysFailWithInvalidArgument(self, request, context): + """Always fails with an InvalidArgument status. + + Args: + request: input TestCase. + context: the rpc context. + + Returns: + output TestCase. + """ + del request + context.set_code(grpc.StatusCode.INVALID_ARGUMENT) + context.set_details(rpc_op_test_base.I_WARNED_YOU) + + def SometimesFailWithInvalidArgument(self, request, context): + """Sometimes fails with an InvalidArgument status. + + Args: + request: input TestCase. + context: the rpc context. + + Returns: + output TestCase. + """ + if random.randint(0, 1) == 1: + context.set_code(grpc.StatusCode.INVALID_ARGUMENT) + context.set_details(rpc_op_test_base.I_WARNED_YOU) + return request + + def SleepForever(self, request, context): + """Sleeps forever. + + Args: + request: input TestCase. + context: the rpc context. + + Returns: + output TestCase. + """ + # TODO(ebrevdo): Make this async wait like the stubby version. + time.sleep(5) + + def SometimesSleepForever(self, request, context): + """Sometimes sleeps forever. + + Args: + request: input TestCase. + context: the rpc context. + + Returns: + output TestCase. + """ + if random.randint(0, 1) == 1: + time.sleep(5) + return request diff --git a/tensorflow/contrib/rpc/python/kernel_tests/test_example.proto b/tensorflow/contrib/rpc/python/kernel_tests/test_example.proto new file mode 100644 index 0000000000..96f4550f62 --- /dev/null +++ b/tensorflow/contrib/rpc/python/kernel_tests/test_example.proto @@ -0,0 +1,171 @@ +// Test description and protos to work with it. +// +// Many of the protos in this file are for unit tests that haven't been written yet. + +syntax = "proto2"; + +import "tensorflow/core/framework/types.proto"; + +package tensorflow.contrib.rpc; + +// A TestCase holds a proto and a bunch of assertions +// about how it should decode. +message TestCase { + // A batch of primitives to be serialized and decoded. + repeated RepeatedPrimitiveValue primitive = 1; + // The shape of the batch. + repeated int32 shape = 2; + // Expected sizes for each field. + repeated int32 sizes = 3; + // Expected values for each field. + repeated FieldSpec field = 4; +}; + +service TestCaseService { + // Copy input, and increment each entry in 'shape' by 1. + rpc IncrementTestShapes(TestCase) returns (TestCase) { + } + + // Sleep forever. + rpc SleepForever(TestCase) returns (TestCase) { + } + + // Sleep forever 50% of the time, return immediately the other 50%. + rpc SometimesSleepForever(TestCase) returns (TestCase) { + } + + // Always fails with InvalidArgument. + rpc AlwaysFailWithInvalidArgument(TestCase) returns (TestCase) { + } + + // Fails with InvalidArgument 50% of the time. + rpc SometimesFailWithInvalidArgument(TestCase) returns (TestCase) { + } +}; + +// FieldSpec describes the expected output for a single field. +message FieldSpec { + optional string name = 1; + optional tensorflow.DataType dtype = 2; + optional RepeatedPrimitiveValue expected = 3; +}; + +message TestValue { + optional PrimitiveValue primitive_value = 1; + optional EnumValue enum_value = 2; + optional MessageValue message_value = 3; + optional RepeatedMessageValue repeated_message_value = 4; + optional RepeatedPrimitiveValue repeated_primitive_value = 6; +} + +message PrimitiveValue { + optional double double_value = 1; + optional float float_value = 2; + optional int64 int64_value = 3; + optional uint64 uint64_value = 4; + optional int32 int32_value = 5; + optional fixed64 fixed64_value = 6; + optional fixed32 fixed32_value = 7; + optional bool bool_value = 8; + optional string string_value = 9; + optional bytes bytes_value = 12; + optional uint32 uint32_value = 13; + optional sfixed32 sfixed32_value = 15; + optional sfixed64 sfixed64_value = 16; + optional sint32 sint32_value = 17; + optional sint64 sint64_value = 18; +} + +// NOTE: This definition must be kept in sync with PackedPrimitiveValue. +message RepeatedPrimitiveValue { + repeated double double_value = 1; + repeated float float_value = 2; + repeated int64 int64_value = 3; + repeated uint64 uint64_value = 4; + repeated int32 int32_value = 5; + repeated fixed64 fixed64_value = 6; + repeated fixed32 fixed32_value = 7; + repeated bool bool_value = 8; + repeated string string_value = 9; + repeated bytes bytes_value = 12; + repeated uint32 uint32_value = 13; + repeated sfixed32 sfixed32_value = 15; + repeated sfixed64 sfixed64_value = 16; + repeated sint32 sint32_value = 17; + repeated sint64 sint64_value = 18; + repeated PrimitiveValue message_value = 19; +} + +// A PackedPrimitiveValue looks exactly the same as a RepeatedPrimitiveValue +// in the text format, but the binary serializion is different. +// We test the packed representations by loading the same test cases +// using this definition instead of RepeatedPrimitiveValue. +// NOTE: This definition must be kept in sync with RepeatedPrimitiveValue +// in every way except the packed=true declaration. +message PackedPrimitiveValue { + repeated double double_value = 1 [packed = true]; + repeated float float_value = 2 [packed = true]; + repeated int64 int64_value = 3 [packed = true]; + repeated uint64 uint64_value = 4 [packed = true]; + repeated int32 int32_value = 5 [packed = true]; + repeated fixed64 fixed64_value = 6 [packed = true]; + repeated fixed32 fixed32_value = 7 [packed = true]; + repeated bool bool_value = 8 [packed = true]; + repeated string string_value = 9; + repeated bytes bytes_value = 12; + repeated uint32 uint32_value = 13 [packed = true]; + repeated sfixed32 sfixed32_value = 15 [packed = true]; + repeated sfixed64 sfixed64_value = 16 [packed = true]; + repeated sint32 sint32_value = 17 [packed = true]; + repeated sint64 sint64_value = 18 [packed = true]; + repeated PrimitiveValue message_value = 19; +} + +message EnumValue { + enum Color { + RED = 0; + ORANGE = 1; + YELLOW = 2; + GREEN = 3; + BLUE = 4; + INDIGO = 5; + VIOLET = 6; + }; + optional Color enum_value = 14; + repeated Color repeated_enum_value = 15; +} + + +message InnerMessageValue { + optional float float_value = 2; + repeated bytes bytes_values = 8; +} + +message MiddleMessageValue { + repeated int32 int32_values = 5; + optional InnerMessageValue message_value = 11; + optional uint32 uint32_value = 13; +} + +message MessageValue { + optional double double_value = 1; + optional MiddleMessageValue message_value = 11; +} + +message RepeatedMessageValue { + message NestedMessageValue { + optional float float_value = 2; + repeated bytes bytes_values = 8; + } + + repeated NestedMessageValue message_values = 11; +} + +// Message containing fields with field numbers higher than any field above. An +// instance of this message is prepended to each binary message in the test to +// exercise the code path that handles fields encoded out of order of field +// number. +message ExtraFields { + optional string string_value = 1776; + optional bool bool_value = 1777; +} diff --git a/tensorflow/core/platform/default/build_config.bzl b/tensorflow/core/platform/default/build_config.bzl index e01e076bcf..a43f5745c0 100644 --- a/tensorflow/core/platform/default/build_config.bzl +++ b/tensorflow/core/platform/default/build_config.bzl @@ -1,7 +1,6 @@ # Platform-specific build configurations. load("@protobuf_archive//:protobuf.bzl", "proto_gen") -load("@protobuf_archive//:protobuf.bzl", "py_proto_library") load("//tensorflow:tensorflow.bzl", "if_not_mobile") load("//tensorflow:tensorflow.bzl", "if_windows") load("//tensorflow:tensorflow.bzl", "if_not_windows") @@ -110,6 +109,12 @@ def _proto_cc_srcs(srcs, use_grpc_plugin=False): ret += [s[:-len(".proto")] + ".grpc.pb.cc" for s in srcs] return ret +def _proto_py_outs(srcs, use_grpc_plugin=False): + ret = [s[:-len(".proto")] + "_pb2.py" for s in srcs] + if use_grpc_plugin: + ret += [s[:-len(".proto")] + "_pb2_grpc.py" for s in srcs] + return ret + # Re-defined protocol buffer rule to allow building "header only" protocol # buffers, to avoid duplicate registrations. Also allows non-iterable cc_libs # containing select() statements. @@ -212,6 +217,80 @@ def cc_proto_library( hdrs=gen_hdrs, **kargs) +# Re-defined protocol buffer rule to bring in the change introduced in commit +# https://github.com/google/protobuf/commit/294b5758c373cbab4b72f35f4cb62dc1d8332b68 +# which was not part of a stable protobuf release in 04/2018. +# TODO(jsimsa): Remove this once the protobuf dependency version is updated +# to include the above commit. +def py_proto_library( + name, + srcs=[], + deps=[], + py_libs=[], + py_extra_srcs=[], + include=None, + default_runtime="@protobuf_archive//:protobuf_python", + protoc="@protobuf_archive//:protoc", + use_grpc_plugin=False, + **kargs): + """Bazel rule to create a Python protobuf library from proto source files + + NOTE: the rule is only an internal workaround to generate protos. The + interface may change and the rule may be removed when bazel has introduced + the native rule. + + Args: + name: the name of the py_proto_library. + srcs: the .proto files of the py_proto_library. + deps: a list of dependency labels; must be py_proto_library. + py_libs: a list of other py_library targets depended by the generated + py_library. + py_extra_srcs: extra source files that will be added to the output + py_library. This attribute is used for internal bootstrapping. + include: a string indicating the include path of the .proto files. + default_runtime: the implicitly default runtime which will be depended on by + the generated py_library target. + protoc: the label of the protocol compiler to generate the sources. + use_grpc_plugin: a flag to indicate whether to call the Python C++ plugin + when processing the proto files. + **kargs: other keyword arguments that are passed to cc_library. + """ + outs = _proto_py_outs(srcs, use_grpc_plugin) + + includes = [] + if include != None: + includes = [include] + + grpc_python_plugin = None + if use_grpc_plugin: + grpc_python_plugin = "//external:grpc_python_plugin" + # Note: Generated grpc code depends on Python grpc module. This dependency + # is not explicitly listed in py_libs. Instead, host system is assumed to + # have grpc installed. + + proto_gen( + name=name + "_genproto", + srcs=srcs, + deps=[s + "_genproto" for s in deps], + includes=includes, + protoc=protoc, + gen_py=1, + outs=outs, + visibility=["//visibility:public"], + plugin=grpc_python_plugin, + plugin_language="grpc" + ) + + if default_runtime and not default_runtime in py_libs + deps: + py_libs = py_libs + [default_runtime] + + native.py_library( + name=name, + srcs=outs+py_extra_srcs, + deps=py_libs+deps, + imports=includes, + **kargs) + def tf_proto_library_cc(name, srcs = [], has_services = None, protodeps = [], visibility = [], testonly = 0, @@ -256,8 +335,7 @@ def tf_proto_library_cc(name, srcs = [], has_services = None, ) def tf_proto_library_py(name, srcs=[], protodeps=[], deps=[], visibility=[], - testonly=0, - srcs_version="PY2AND3"): + testonly=0, srcs_version="PY2AND3", use_grpc_plugin=False): py_proto_library( name = name + "_py", srcs = srcs, @@ -267,6 +345,7 @@ def tf_proto_library_py(name, srcs=[], protodeps=[], deps=[], visibility=[], default_runtime = "@protobuf_archive//:protobuf_python", visibility = visibility, testonly = testonly, + use_grpc_plugin = use_grpc_plugin, ) def tf_jspb_proto_library(**kwargs): @@ -305,6 +384,7 @@ def tf_proto_library(name, srcs = [], has_services = None, srcs_version = "PY2AND3", testonly = testonly, visibility = visibility, + use_grpc_plugin = has_services, ) def tf_additional_lib_hdrs(exclude = []): diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD index a0bae23a7c..2ef105755f 100644 --- a/tensorflow/tools/pip_package/BUILD +++ b/tensorflow/tools/pip_package/BUILD @@ -76,6 +76,7 @@ COMMON_PIP_DEPS = [ "//tensorflow/contrib/predictor:predictor_pip", "//tensorflow/contrib/proto:proto_pip", "//tensorflow/contrib/receptive_field:receptive_field_pip", + "//tensorflow/contrib/rpc:rpc_pip", "//tensorflow/contrib/session_bundle:session_bundle_pip", "//tensorflow/contrib/signal:signal_py", "//tensorflow/contrib/signal:test_util", diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 018a395063..48728ac131 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -752,6 +752,10 @@ def tf_workspace(path_prefix="", tf_repo_name=""): name = "grpc_cpp_plugin", actual = "@grpc//:grpc_cpp_plugin", ) + native.bind( + name = "grpc_python_plugin", + actual = "@grpc//:grpc_python_plugin", + ) # gRPC has three empty C++ functions which it wants the user to define # at build time. https://github.com/grpc/grpc/issues/13590 -- GitLab From 113f102164e822aa15d1e875287009fef9d8b823 Mon Sep 17 00:00:00 2001 From: Younghee Kwon Date: Mon, 16 Apr 2018 12:56:14 -0700 Subject: [PATCH 028/434] boosted_trees: Make some regularizer/hyper-params as inputs instead of attributes. PiperOrigin-RevId: 193085059 --- ...tedTreesCalculateBestGainsPerFeature.pbtxt | 38 +++++++-------- .../api_def_BoostedTreesPredict.pbtxt | 6 --- .../api_def_BoostedTreesTrainingPredict.pbtxt | 6 --- .../api_def_BoostedTreesUpdateEnsemble.pbtxt | 4 +- .../kernels/boosted_trees/prediction_ops.cc | 16 +++---- .../core/kernels/boosted_trees/stats_ops.cc | 44 ++++++++++-------- .../kernels/boosted_trees/training_ops.cc | 19 ++++---- tensorflow/core/ops/boosted_trees_ops.cc | 36 +++++---------- .../core/ops/compat/ops_history.v1.pbtxt | 46 +++++++------------ .../python/estimator/canned/boosted_trees.py | 6 +-- .../boosted_trees/prediction_ops_test.py | 14 +----- 11 files changed, 96 insertions(+), 139 deletions(-) diff --git a/tensorflow/core/api_def/base_api/api_def_BoostedTreesCalculateBestGainsPerFeature.pbtxt b/tensorflow/core/api_def/base_api/api_def_BoostedTreesCalculateBestGainsPerFeature.pbtxt index 62876a293c..7f18c64574 100644 --- a/tensorflow/core/api_def/base_api/api_def_BoostedTreesCalculateBestGainsPerFeature.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_BoostedTreesCalculateBestGainsPerFeature.pbtxt @@ -11,6 +11,24 @@ END name: "stats_summary_list" description: <GetAttr("max_depth", &max_depth_)); } void Compute(OpKernelContext* const context) override { @@ -155,9 +154,10 @@ class BoostedTreesTrainingPredictOp : public OpKernel { output_partial_logits(i, 0) = partial_all_logit; } }; - // Assume we will not go over more than one full tree. 4 is a magic - // number. - const int64 cost = 4 * max_depth_; + // 30 is the magic number. The actual value might be a function of (the + // number of layers) * (cpu cycles spent on each layer), but this value + // would work for many cases. May be tuned later. + const int64 cost = 30; thread::ThreadPool* const worker_threads = context->device()->tensorflow_cpu_worker_threads()->workers; Shard(worker_threads->NumThreads(), worker_threads, batch_size, @@ -168,7 +168,6 @@ class BoostedTreesTrainingPredictOp : public OpKernel { private: int32 logits_dimension_; // the size of the output prediction vector. int32 num_bucketized_features_; // Indicates the number of features. - int32 max_depth_; }; REGISTER_KERNEL_BUILDER(Name("BoostedTreesTrainingPredict").Device(DEVICE_CPU), @@ -186,7 +185,6 @@ class BoostedTreesPredictOp : public OpKernel { OP_REQUIRES(context, logits_dimension_ == 1, errors::InvalidArgument( "Currently only one dimensional outputs are supported.")); - OP_REQUIRES_OK(context, context->GetAttr("max_depth", &max_depth_)); } void Compute(OpKernelContext* const context) override { @@ -243,7 +241,10 @@ class BoostedTreesPredictOp : public OpKernel { output_logits(i, 0) = tree_logit; } }; - const int64 cost = (latest_tree + 1) * max_depth_; + // 10 is the magic number. The actual number might depend on (the number of + // layers in the trees) and (cpu cycles spent on each layer), but this + // value would work for many cases. May be tuned later. + const int64 cost = (latest_tree + 1) * 10; thread::ThreadPool* const worker_threads = context->device()->tensorflow_cpu_worker_threads()->workers; Shard(worker_threads->NumThreads(), worker_threads, batch_size, @@ -254,7 +255,6 @@ class BoostedTreesPredictOp : public OpKernel { int32 logits_dimension_; // Indicates the size of the output prediction vector. int32 num_bucketized_features_; // Indicates the number of features. - int32 max_depth_; }; REGISTER_KERNEL_BUILDER(Name("BoostedTreesPredict").Device(DEVICE_CPU), diff --git a/tensorflow/core/kernels/boosted_trees/stats_ops.cc b/tensorflow/core/kernels/boosted_trees/stats_ops.cc index 16e65cf284..40f50333d3 100644 --- a/tensorflow/core/kernels/boosted_trees/stats_ops.cc +++ b/tensorflow/core/kernels/boosted_trees/stats_ops.cc @@ -29,10 +29,6 @@ class BoostedTreesCalculateBestGainsPerFeatureOp : public OpKernel { explicit BoostedTreesCalculateBestGainsPerFeatureOp( OpKernelConstruction* const context) : OpKernel(context) { - OP_REQUIRES_OK(context, context->GetAttr("l1", &l1_)); - OP_REQUIRES_OK(context, context->GetAttr("l2", &l2_)); - OP_REQUIRES_OK(context, - context->GetAttr("tree_complexity", &tree_complexity_)); OP_REQUIRES_OK(context, context->GetAttr("max_splits", &max_splits_)); OP_REQUIRES_OK(context, context->GetAttr("num_features", &num_features_)); } @@ -54,6 +50,16 @@ class BoostedTreesCalculateBestGainsPerFeatureOp : public OpKernel { for (const auto& tensor : stats_summary_list) { stats_summary.emplace_back(tensor.tensor()); } + const Tensor* l1_t; + OP_REQUIRES_OK(context, context->input("l1", &l1_t)); + const auto l1 = l1_t->scalar()(); + const Tensor* l2_t; + OP_REQUIRES_OK(context, context->input("l2", &l2_t)); + const auto l2 = l2_t->scalar()(); + const Tensor* tree_complexity_t; + OP_REQUIRES_OK(context, + context->input("tree_complexity", &tree_complexity_t)); + const auto tree_complexity = tree_complexity_t->scalar()(); // Allocate output lists of tensors: OpOutputList output_node_ids_list; @@ -106,7 +112,8 @@ class BoostedTreesCalculateBestGainsPerFeatureOp : public OpKernel { // Parent gain. float parent_gain; float unused; - CalculateWeightsAndGains(total_grad, total_hess, &unused, &parent_gain); + CalculateWeightsAndGains(total_grad, total_hess, l1, l2, &unused, + &parent_gain); for (int bucket = 0; bucket < num_buckets; ++bucket) { const float cum_grad_bucket = cum_grad[bucket]; @@ -114,13 +121,13 @@ class BoostedTreesCalculateBestGainsPerFeatureOp : public OpKernel { // Left child. float contrib_for_left; float gain_for_left; - CalculateWeightsAndGains(cum_grad_bucket, cum_hess_bucket, + CalculateWeightsAndGains(cum_grad_bucket, cum_hess_bucket, l1, l2, &contrib_for_left, &gain_for_left); // Right child. float contrib_for_right; float gain_for_right; CalculateWeightsAndGains(total_grad - cum_grad_bucket, - total_hess - cum_hess_bucket, + total_hess - cum_hess_bucket, l1, l2, &contrib_for_right, &gain_for_right); if (gain_for_left + gain_for_right > best_gain) { @@ -173,7 +180,7 @@ class BoostedTreesCalculateBestGainsPerFeatureOp : public OpKernel { for (int i = 0; i < num_nodes; ++i) { output_node_ids_vec(i) = output_node_ids[i]; // Adjust the gains to penalize by tree complexity. - output_gains_vec(i) = output_gains[i] - tree_complexity_; + output_gains_vec(i) = output_gains[i] - tree_complexity; output_thresholds_vec(i) = output_thresholds[i]; // Logits are 1-dimensional for now. // TODO(nponomareva): Consider multi-dimensional logits. @@ -184,8 +191,8 @@ class BoostedTreesCalculateBestGainsPerFeatureOp : public OpKernel { } private: - void CalculateWeightsAndGains(const float g, const float h, float* weight, - float* gain) { + void CalculateWeightsAndGains(const float g, const float h, const float l1, + const float l2, float* weight, float* gain) { // // The formula for weight is -(g+l1*sgn(w))/(H+l2), for gain it is // (g+l1*sgn(w))^2/(h+l2). @@ -196,11 +203,11 @@ class BoostedTreesCalculateBestGainsPerFeatureOp : public OpKernel { // 1) Assume w>0 => w=-(g+l1)/(h+l2)=> g+l1 < 0 => g < -l1 // 2) Assume w<0 => w=-(g-l1)/(h+l2)=> g-l1 > 0 => g > l1 // For g from (-l1, l1), thus there is no solution => set to 0. - if (l1_ > 0) { - if (g > l1_) { - g_with_l1 -= l1_; - } else if (g < -l1_) { - g_with_l1 += l1_; + if (l1 > 0) { + if (g > l1) { + g_with_l1 -= l1; + } else if (g < -l1) { + g_with_l1 += l1; } else { *weight = 0.0; *gain = 0.0; @@ -208,19 +215,16 @@ class BoostedTreesCalculateBestGainsPerFeatureOp : public OpKernel { } } // Apply L2 regularization. - if (h + l2_ <= kEps) { + if (h + l2 <= kEps) { // Avoid division by 0 or infinitesimal. *weight = 0; *gain = 0; } else { - *weight = -g_with_l1 / (h + l2_); + *weight = -g_with_l1 / (h + l2); *gain = -g_with_l1 * (*weight); } } - float l1_; - float l2_; - float tree_complexity_; int max_splits_; int num_features_; }; diff --git a/tensorflow/core/kernels/boosted_trees/training_ops.cc b/tensorflow/core/kernels/boosted_trees/training_ops.cc index 67cac14c52..a14fd4a133 100644 --- a/tensorflow/core/kernels/boosted_trees/training_ops.cc +++ b/tensorflow/core/kernels/boosted_trees/training_ops.cc @@ -43,8 +43,6 @@ class BoostedTreesUpdateEnsembleOp : public OpKernel { public: explicit BoostedTreesUpdateEnsembleOp(OpKernelConstruction* const context) : OpKernel(context) { - OP_REQUIRES_OK(context, context->GetAttr("max_depth", &max_depth_)); - OP_REQUIRES_OK(context, context->GetAttr("learning_rate", &learning_rate_)); OP_REQUIRES_OK(context, context->GetAttr("num_features", &num_features_)); int32 pruning_index; @@ -79,8 +77,15 @@ class BoostedTreesUpdateEnsembleOp : public OpKernel { const Tensor* feature_ids_t; OP_REQUIRES_OK(context, context->input("feature_ids", &feature_ids_t)); + const auto feature_ids = feature_ids_t->vec(); - auto feature_ids = feature_ids_t->vec(); + const Tensor* max_depth_t; + OP_REQUIRES_OK(context, context->input("max_depth", &max_depth_t)); + const auto max_depth = max_depth_t->scalar()(); + + const Tensor* learning_rate_t; + OP_REQUIRES_OK(context, context->input("learning_rate", &learning_rate_t)); + const auto learning_rate = learning_rate_t->scalar()(); // Find best splits for each active node. std::map best_splits; @@ -125,10 +130,10 @@ class BoostedTreesUpdateEnsembleOp : public OpKernel { // For now assume that the weights vectors are one dimensional. // TODO(nponomareva): change here for multiclass. const float left_contrib = - learning_rate_ * + learning_rate * left_node_contribs[feature_idx].matrix()(candidate_idx, 0); const float right_contrib = - learning_rate_ * + learning_rate * right_node_contribs[feature_idx].matrix()(candidate_idx, 0); // unused. @@ -145,7 +150,7 @@ class BoostedTreesUpdateEnsembleOp : public OpKernel { // Update growable tree metadata. ensemble_resource->SetNumLayersGrown(current_tree, new_num_layers); // Finalize the tree if needed. - if (ensemble_resource->GetNumLayersGrown(current_tree) >= max_depth_) { + if (ensemble_resource->GetNumLayersGrown(current_tree) >= max_depth) { // If the tree is finalized, next growing will start from node 0; node_id_start = 0; node_id_end = 1; @@ -216,8 +221,6 @@ class BoostedTreesUpdateEnsembleOp : public OpKernel { private: int32 num_features_; - float learning_rate_; - int32 max_depth_; PruningMode pruning_mode_; }; diff --git a/tensorflow/core/ops/boosted_trees_ops.cc b/tensorflow/core/ops/boosted_trees_ops.cc index 8af4903418..4d74e6d63a 100644 --- a/tensorflow/core/ops/boosted_trees_ops.cc +++ b/tensorflow/core/ops/boosted_trees_ops.cc @@ -37,9 +37,9 @@ REGISTER_OP("IsBoostedTreesEnsembleInitialized") REGISTER_OP("BoostedTreesCalculateBestGainsPerFeature") .Input("node_id_range: int32") .Input("stats_summary_list: num_features * float32") - .Attr("l1: float") - .Attr("l2: float") - .Attr("tree_complexity: float") + .Input("l1: float") + .Input("l2: float") + .Input("tree_complexity: float") .Attr("max_splits: int >= 1") .Attr("num_features: int >= 1") // not passed but populated automatically. .Output("node_ids_list: num_features * int32") @@ -51,19 +51,6 @@ REGISTER_OP("BoostedTreesCalculateBestGainsPerFeature") // Confirms the rank of the inputs and sets the shape of the outputs. int max_splits; int num_features; - float l1, l2, tree_complexity; - TF_RETURN_IF_ERROR(c->GetAttr("l1", &l1)); - if (l1 < 0) { - return errors::InvalidArgument("l1 must be non-negative."); - } - TF_RETURN_IF_ERROR(c->GetAttr("l2", &l2)); - if (l2 < 0) { - return errors::InvalidArgument("l2 must be non-negative."); - } - TF_RETURN_IF_ERROR(c->GetAttr("tree_complexity", &tree_complexity)); - if (tree_complexity < 0) { - return errors::InvalidArgument("Tree complexity must be non-negative."); - } TF_RETURN_IF_ERROR(c->GetAttr("max_splits", &max_splits)); TF_RETURN_IF_ERROR(c->GetAttr("num_features", &num_features)); shape_inference::ShapeHandle node_id_range_shape; @@ -83,6 +70,12 @@ REGISTER_OP("BoostedTreesCalculateBestGainsPerFeature") TF_RETURN_IF_ERROR( c->Merge(summary_shape_base, summary_shape, &unused_shape)); } + TF_RETURN_IF_ERROR( + c->WithRank(c->input(num_features + 1), 0, &unused_shape)); + TF_RETURN_IF_ERROR( + c->WithRank(c->input(num_features + 2), 0, &unused_shape)); + TF_RETURN_IF_ERROR( + c->WithRank(c->input(num_features + 3), 0, &unused_shape)); // Sets the output lists. std::vector output_shapes_vec( num_features, c->MakeShape({-1})); @@ -185,9 +178,8 @@ REGISTER_OP("BoostedTreesMakeStatsSummary") REGISTER_OP("BoostedTreesPredict") .Input("tree_ensemble_handle: resource") .Input("bucketized_features: num_bucketized_features * int32") - .Attr("num_bucketized_features: int >= 1") + .Attr("num_bucketized_features: int >= 1") // Inferred. .Attr("logits_dimension: int") - .Attr("max_depth: int >= 1") .Output("logits: float") .SetShapeFn([](shape_inference::InferenceContext* c) { shape_inference::ShapeHandle feature_shape; @@ -229,7 +221,6 @@ REGISTER_OP("BoostedTreesTrainingPredict") .Input("bucketized_features: num_bucketized_features * int32") .Attr("num_bucketized_features: int >= 1") .Attr("logits_dimension: int") - .Attr("max_depth: int >= 1") .Output("partial_logits: float") .Output("tree_ids: int32") .Output("node_ids: int32") @@ -239,9 +230,6 @@ REGISTER_OP("BoostedTreesTrainingPredict") TF_RETURN_IF_ERROR( c->GetAttr("num_bucketized_features", &num_bucketized_features)); - int max_depth; - TF_RETURN_IF_ERROR(c->GetAttr("max_depth", &max_depth)); - shape_inference::ShapeHandle unused_input; for (int i = 0; i < num_bucketized_features; ++i) { TF_RETURN_IF_ERROR(c->WithRank(c->input(i + 3), 1, &feature_shape)); @@ -273,8 +261,8 @@ REGISTER_OP("BoostedTreesUpdateEnsemble") .Input("thresholds: num_features * int32") .Input("left_node_contribs: num_features * float") .Input("right_node_contribs: num_features * float") - .Attr("max_depth: int >= 1") - .Attr("learning_rate: float") + .Input("max_depth: int32") + .Input("learning_rate: float") .Attr("pruning_mode: int >=0") .Attr("num_features: int >= 0") // Inferred. .SetShapeFn([](shape_inference::InferenceContext* c) { diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt index 2f6f588d2c..c627fee352 100644 --- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt +++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt @@ -10735,6 +10735,18 @@ op { type: DT_FLOAT number_attr: "num_features" } + input_arg { + name: "l1" + type: DT_FLOAT + } + input_arg { + name: "l2" + type: DT_FLOAT + } + input_arg { + name: "tree_complexity" + type: DT_FLOAT + } output_arg { name: "node_ids_list" type: DT_INT32 @@ -10760,18 +10772,6 @@ op { type: DT_FLOAT number_attr: "num_features" } - attr { - name: "l1" - type: "float" - } - attr { - name: "l2" - type: "float" - } - attr { - name: "tree_complexity" - type: "float" - } attr { name: "max_splits" type: "int" @@ -10934,12 +10934,6 @@ op { name: "logits_dimension" type: "int" } - attr { - name: "max_depth" - type: "int" - has_minimum: true - minimum: 1 - } is_stateful: true } op { @@ -10999,12 +10993,6 @@ op { name: "logits_dimension" type: "int" } - attr { - name: "max_depth" - type: "int" - has_minimum: true - minimum: 1 - } is_stateful: true } op { @@ -11042,15 +11030,13 @@ op { type: DT_FLOAT number_attr: "num_features" } - attr { + input_arg { name: "max_depth" - type: "int" - has_minimum: true - minimum: 1 + type: DT_INT32 } - attr { + input_arg { name: "learning_rate" - type: "float" + type: DT_FLOAT } attr { name: "pruning_mode" diff --git a/tensorflow/python/estimator/canned/boosted_trees.py b/tensorflow/python/estimator/canned/boosted_trees.py index 0ecc8c7089..d099d308f5 100644 --- a/tensorflow/python/estimator/canned/boosted_trees.py +++ b/tensorflow/python/estimator/canned/boosted_trees.py @@ -325,8 +325,7 @@ def _bt_model_fn( # so no local copy is needed; using tree_ensemble directly. tree_ensemble_handle=tree_ensemble.resource_handle, bucketized_features=input_feature_list, - logits_dimension=head.logits_dimension, - max_depth=tree_hparams.max_depth) + logits_dimension=head.logits_dimension) else: if is_single_machine: local_tree_ensemble = tree_ensemble @@ -361,8 +360,7 @@ def _bt_model_fn( cached_tree_ids=cached_tree_ids, cached_node_ids=cached_node_ids, bucketized_features=input_feature_list, - logits_dimension=head.logits_dimension, - max_depth=tree_hparams.max_depth) + logits_dimension=head.logits_dimension) logits = cached_logits + partial_logits # Create training graph. diff --git a/tensorflow/python/kernel_tests/boosted_trees/prediction_ops_test.py b/tensorflow/python/kernel_tests/boosted_trees/prediction_ops_test.py index d132f15e51..54f33f3360 100644 --- a/tensorflow/python/kernel_tests/boosted_trees/prediction_ops_test.py +++ b/tensorflow/python/kernel_tests/boosted_trees/prediction_ops_test.py @@ -49,7 +49,6 @@ class TrainingPredictionOpsTest(test_util.TensorFlowTestCase): # Grow tree ensemble. predict_op = boosted_trees_ops.training_predict( tree_ensemble_handle, - max_depth=2, cached_tree_ids=cached_tree_ids, cached_node_ids=cached_node_ids, bucketized_features=[feature_0_values, feature_1_values], @@ -116,7 +115,6 @@ class TrainingPredictionOpsTest(test_util.TensorFlowTestCase): # Grow tree ensemble. predict_op = boosted_trees_ops.training_predict( tree_ensemble_handle, - max_depth=2, cached_tree_ids=cached_tree_ids, cached_node_ids=cached_node_ids, bucketized_features=[feature_0_values], @@ -189,7 +187,6 @@ class TrainingPredictionOpsTest(test_util.TensorFlowTestCase): # Grow tree ensemble. predict_op = boosted_trees_ops.training_predict( tree_ensemble_handle, - max_depth=4, cached_tree_ids=cached_tree_ids, cached_node_ids=cached_node_ids, bucketized_features=[feature_0_values, feature_1_values], @@ -299,7 +296,6 @@ class TrainingPredictionOpsTest(test_util.TensorFlowTestCase): # Grow tree ensemble. predict_op = boosted_trees_ops.training_predict( tree_ensemble_handle, - max_depth=4, cached_tree_ids=cached_tree_ids, cached_node_ids=cached_node_ids, bucketized_features=[feature_0_values, feature_1_values], @@ -429,7 +425,6 @@ class TrainingPredictionOpsTest(test_util.TensorFlowTestCase): # Grow tree ensemble. predict_op = boosted_trees_ops.training_predict( tree_ensemble_handle, - max_depth=2, cached_tree_ids=cached_tree_ids, cached_node_ids=cached_node_ids, bucketized_features=[feature_0_values, feature_1_values], @@ -562,7 +557,6 @@ class TrainingPredictionOpsTest(test_util.TensorFlowTestCase): # Grow tree ensemble. predict_op = boosted_trees_ops.training_predict( tree_ensemble_handle, - max_depth=3, cached_tree_ids=cached_tree_ids, cached_node_ids=cached_node_ids, bucketized_features=[feature_0_values, feature_1_values], @@ -705,7 +699,6 @@ class TrainingPredictionOpsTest(test_util.TensorFlowTestCase): # Grow tree ensemble. predict_op = boosted_trees_ops.training_predict( tree_ensemble_handle, - max_depth=3, cached_tree_ids=cached_tree_ids, cached_node_ids=cached_node_ids, bucketized_features=[feature_0_values, feature_1_values], @@ -782,7 +775,6 @@ class TrainingPredictionOpsTest(test_util.TensorFlowTestCase): # Grow tree ensemble. predict_op = boosted_trees_ops.training_predict( tree_ensemble_handle, - max_depth=1, cached_tree_ids=cached_tree_ids, cached_node_ids=cached_node_ids, bucketized_features=[feature_0_values, feature_1_values], @@ -905,8 +897,7 @@ class PredictionOpsTest(test_util.TensorFlowTestCase): predict_op = boosted_trees_ops.predict( tree_ensemble_handle, bucketized_features=[feature_0_values, feature_1_values], - logits_dimension=1, - max_depth=2) + logits_dimension=1) logits = session.run(predict_op) self.assertAllClose(expected_logits, logits) @@ -915,8 +906,7 @@ class PredictionOpsTest(test_util.TensorFlowTestCase): predict_op = boosted_trees_ops.predict( tree_ensemble_handle, bucketized_features=[feature_0_values, feature_1_values], - logits_dimension=1, - max_depth=2) + logits_dimension=1) logits = session.run(predict_op) self.assertAllClose(expected_logits, logits) -- GitLab From 91129bbb3cbc01c7ecc776048988ae83ba50e3c1 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 16 Apr 2018 18:03:05 -0700 Subject: [PATCH 029/434] Adding min node weight regularization PiperOrigin-RevId: 193131300 --- .../python/estimator/boosted_trees.py | 18 +++- ...tedTreesCalculateBestGainsPerFeature.pbtxt | 8 +- .../core/kernels/boosted_trees/stats_ops.cc | 9 ++ tensorflow/core/ops/boosted_trees_ops.cc | 1 + .../core/ops/compat/ops_history.v1.pbtxt | 4 + .../python/estimator/canned/boosted_trees.py | 85 ++++++++++--------- .../estimator/canned/boosted_trees_test.py | 3 +- .../boosted_trees/stats_ops_test.py | 51 +++++++++++ ....estimator.-boosted-trees-classifier.pbtxt | 2 +- ...w.estimator.-boosted-trees-regressor.pbtxt | 2 +- 10 files changed, 138 insertions(+), 45 deletions(-) diff --git a/tensorflow/contrib/estimator/python/estimator/boosted_trees.py b/tensorflow/contrib/estimator/python/estimator/boosted_trees.py index 314c54ed00..00356ce0ca 100644 --- a/tensorflow/contrib/estimator/python/estimator/boosted_trees.py +++ b/tensorflow/contrib/estimator/python/estimator/boosted_trees.py @@ -36,6 +36,7 @@ class _BoostedTreesEstimator(estimator.Estimator): l1_regularization=0., l2_regularization=0., tree_complexity=0., + min_node_weight=0., config=None): """Initializes a `BoostedTreesEstimator` instance. @@ -65,13 +66,16 @@ class _BoostedTreesEstimator(estimator.Estimator): l2_regularization: regularization multiplier applied to the square weights of the tree leafs. tree_complexity: regularization factor to penalize trees with more leaves. + min_node_weight: minimum hessian a node must have for a split to be + considered. The value will be compared with sum(leaf_hessian)/ + (batch_size * n_batches_per_layer). config: `RunConfig` object to configure the runtime settings. """ # pylint:disable=protected-access # HParams for the model. tree_hparams = canned_boosted_trees._TreeHParams( n_trees, max_depth, learning_rate, l1_regularization, l2_regularization, - tree_complexity) + tree_complexity, min_node_weight) def _model_fn(features, labels, mode, config): return canned_boosted_trees._bt_model_fn( @@ -96,6 +100,7 @@ def boosted_trees_classifier_train_in_memory( l1_regularization=0., l2_regularization=0., tree_complexity=0., + min_node_weight=0., config=None, train_hooks=None): """Trains a boosted tree classifier with in memory dataset. @@ -162,6 +167,9 @@ def boosted_trees_classifier_train_in_memory( l2_regularization: regularization multiplier applied to the square weights of the tree leafs. tree_complexity: regularization factor to penalize trees with more leaves. + min_node_weight: minimum hessian a node must have for a split to be + considered. The value will be compared with sum(leaf_hessian)/ + (batch_size * n_batches_per_layer). config: `RunConfig` object to configure the runtime settings. train_hooks: a list of Hook instances to be passed to estimator.train(). @@ -184,7 +192,7 @@ def boosted_trees_classifier_train_in_memory( # HParams for the model. tree_hparams = canned_boosted_trees._TreeHParams( n_trees, max_depth, learning_rate, l1_regularization, l2_regularization, - tree_complexity) + tree_complexity, min_node_weight) def _model_fn(features, labels, mode, config): return canned_boosted_trees._bt_model_fn( @@ -220,6 +228,7 @@ def boosted_trees_regressor_train_in_memory( l1_regularization=0., l2_regularization=0., tree_complexity=0., + min_node_weight=0., config=None, train_hooks=None): """Trains a boosted tree regressor with in memory dataset. @@ -279,6 +288,9 @@ def boosted_trees_regressor_train_in_memory( l2_regularization: regularization multiplier applied to the square weights of the tree leafs. tree_complexity: regularization factor to penalize trees with more leaves. + min_node_weight: minimum hessian a node must have for a split to be + considered. The value will be compared with sum(leaf_hessian)/ + (batch_size * n_batches_per_layer). config: `RunConfig` object to configure the runtime settings. train_hooks: a list of Hook instances to be passed to estimator.train(). @@ -300,7 +312,7 @@ def boosted_trees_regressor_train_in_memory( # HParams for the model. tree_hparams = canned_boosted_trees._TreeHParams( n_trees, max_depth, learning_rate, l1_regularization, l2_regularization, - tree_complexity) + tree_complexity, min_node_weight) def _model_fn(features, labels, mode, config): return canned_boosted_trees._bt_model_fn( diff --git a/tensorflow/core/api_def/base_api/api_def_BoostedTreesCalculateBestGainsPerFeature.pbtxt b/tensorflow/core/api_def/base_api/api_def_BoostedTreesCalculateBestGainsPerFeature.pbtxt index 7f18c64574..3f181e91ce 100644 --- a/tensorflow/core/api_def/base_api/api_def_BoostedTreesCalculateBestGainsPerFeature.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_BoostedTreesCalculateBestGainsPerFeature.pbtxt @@ -29,6 +29,12 @@ END name: "tree_complexity" description: <input("tree_complexity", &tree_complexity_t)); const auto tree_complexity = tree_complexity_t->scalar()(); + const Tensor* min_node_weight_t; + OP_REQUIRES_OK(context, + context->input("min_node_weight", &min_node_weight_t)); + const auto min_node_weight = min_node_weight_t->scalar()(); // Allocate output lists of tensors: OpOutputList output_node_ids_list; @@ -105,6 +109,11 @@ class BoostedTreesCalculateBestGainsPerFeatureOp : public OpKernel { cum_grad.push_back(total_grad); cum_hess.push_back(total_hess); } + // Check if node has enough of average hessian. + if (total_hess < min_node_weight) { + // Do not split the node because not enough avg hessian. + continue; + } float best_gain = std::numeric_limits::lowest(); float best_bucket = 0; float best_contrib_for_left = 0.0; diff --git a/tensorflow/core/ops/boosted_trees_ops.cc b/tensorflow/core/ops/boosted_trees_ops.cc index 4d74e6d63a..88d6eaf819 100644 --- a/tensorflow/core/ops/boosted_trees_ops.cc +++ b/tensorflow/core/ops/boosted_trees_ops.cc @@ -40,6 +40,7 @@ REGISTER_OP("BoostedTreesCalculateBestGainsPerFeature") .Input("l1: float") .Input("l2: float") .Input("tree_complexity: float") + .Input("min_node_weight: float") .Attr("max_splits: int >= 1") .Attr("num_features: int >= 1") // not passed but populated automatically. .Output("node_ids_list: num_features * int32") diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt index c627fee352..4a24c44d69 100644 --- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt +++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt @@ -10747,6 +10747,10 @@ op { name: "tree_complexity" type: DT_FLOAT } + input_arg { + name: "min_node_weight" + type: DT_FLOAT + } output_arg { name: "node_ids_list" type: DT_INT32 diff --git a/tensorflow/python/estimator/canned/boosted_trees.py b/tensorflow/python/estimator/canned/boosted_trees.py index d099d308f5..536bd2bf81 100644 --- a/tensorflow/python/estimator/canned/boosted_trees.py +++ b/tensorflow/python/estimator/canned/boosted_trees.py @@ -40,9 +40,11 @@ from tensorflow.python.training import session_run_hook from tensorflow.python.training import training_util from tensorflow.python.util.tf_export import tf_export -_TreeHParams = collections.namedtuple( - 'TreeHParams', - ['n_trees', 'max_depth', 'learning_rate', 'l1', 'l2', 'tree_complexity']) +# TODO(nponomareva): Reveal pruning params here. +_TreeHParams = collections.namedtuple('TreeHParams', [ + 'n_trees', 'max_depth', 'learning_rate', 'l1', 'l2', 'tree_complexity', + 'min_node_weight' +]) _HOLD_FOR_MULTI_CLASS_SUPPORT = object() _HOLD_FOR_MULTI_DIM_SUPPORT = object() @@ -397,6 +399,7 @@ def _bt_model_fn( l1=tree_hparams.l1, l2=tree_hparams.l2, tree_complexity=tree_hparams.tree_complexity, + min_node_weight=tree_hparams.min_node_weight, max_splits=max_splits)) grow_op = boosted_trees_ops.update_ensemble( # Confirm if local_tree_ensemble or tree_ensemble should be used. @@ -515,21 +518,21 @@ def _create_regression_head(label_dimension, weight_column=None): class BoostedTreesClassifier(estimator.Estimator): """A Classifier for Tensorflow Boosted Trees models.""" - def __init__( - self, - feature_columns, - n_batches_per_layer, - model_dir=None, - n_classes=_HOLD_FOR_MULTI_CLASS_SUPPORT, - weight_column=None, - label_vocabulary=None, - n_trees=100, - max_depth=6, - learning_rate=0.1, - l1_regularization=0., - l2_regularization=0., - tree_complexity=0., - config=None): + def __init__(self, + feature_columns, + n_batches_per_layer, + model_dir=None, + n_classes=_HOLD_FOR_MULTI_CLASS_SUPPORT, + weight_column=None, + label_vocabulary=None, + n_trees=100, + max_depth=6, + learning_rate=0.1, + l1_regularization=0., + l2_regularization=0., + tree_complexity=0., + min_node_weight=0., + config=None): """Initializes a `BoostedTreesClassifier` instance. Example: @@ -593,6 +596,9 @@ class BoostedTreesClassifier(estimator.Estimator): l2_regularization: regularization multiplier applied to the square weights of the tree leafs. tree_complexity: regularization factor to penalize trees with more leaves. + min_node_weight: min_node_weight: minimum hessian a node must have for a + split to be considered. The value will be compared with + sum(leaf_hessian)/(batch_size * n_batches_per_layer). config: `RunConfig` object to configure the runtime settings. Raises: @@ -606,9 +612,9 @@ class BoostedTreesClassifier(estimator.Estimator): n_classes, weight_column, label_vocabulary=label_vocabulary) # HParams for the model. - tree_hparams = _TreeHParams( - n_trees, max_depth, learning_rate, l1_regularization, l2_regularization, - tree_complexity) + tree_hparams = _TreeHParams(n_trees, max_depth, learning_rate, + l1_regularization, l2_regularization, + tree_complexity, min_node_weight) def _model_fn(features, labels, mode, config): return _bt_model_fn( # pylint: disable=protected-access @@ -630,20 +636,20 @@ class BoostedTreesClassifier(estimator.Estimator): class BoostedTreesRegressor(estimator.Estimator): """A Regressor for Tensorflow Boosted Trees models.""" - def __init__( - self, - feature_columns, - n_batches_per_layer, - model_dir=None, - label_dimension=_HOLD_FOR_MULTI_DIM_SUPPORT, - weight_column=None, - n_trees=100, - max_depth=6, - learning_rate=0.1, - l1_regularization=0., - l2_regularization=0., - tree_complexity=0., - config=None): + def __init__(self, + feature_columns, + n_batches_per_layer, + model_dir=None, + label_dimension=_HOLD_FOR_MULTI_DIM_SUPPORT, + weight_column=None, + n_trees=100, + max_depth=6, + learning_rate=0.1, + l1_regularization=0., + l2_regularization=0., + tree_complexity=0., + min_node_weight=0., + config=None): """Initializes a `BoostedTreesRegressor` instance. Example: @@ -700,6 +706,9 @@ class BoostedTreesRegressor(estimator.Estimator): l2_regularization: regularization multiplier applied to the square weights of the tree leafs. tree_complexity: regularization factor to penalize trees with more leaves. + min_node_weight: min_node_weight: minimum hessian a node must have for a + split to be considered. The value will be compared with + sum(leaf_hessian)/(batch_size * n_batches_per_layer). config: `RunConfig` object to configure the runtime settings. Raises: @@ -712,9 +721,9 @@ class BoostedTreesRegressor(estimator.Estimator): head = _create_regression_head(label_dimension, weight_column) # HParams for the model. - tree_hparams = _TreeHParams( - n_trees, max_depth, learning_rate, l1_regularization, l2_regularization, - tree_complexity) + tree_hparams = _TreeHParams(n_trees, max_depth, learning_rate, + l1_regularization, l2_regularization, + tree_complexity, min_node_weight) def _model_fn(features, labels, mode, config): return _bt_model_fn( # pylint: disable=protected-access diff --git a/tensorflow/python/estimator/canned/boosted_trees_test.py b/tensorflow/python/estimator/canned/boosted_trees_test.py index 7823ef8410..56e67a6707 100644 --- a/tensorflow/python/estimator/canned/boosted_trees_test.py +++ b/tensorflow/python/estimator/canned/boosted_trees_test.py @@ -188,7 +188,8 @@ class ModelFnTests(test_util.TensorFlowTestCase): learning_rate=0.1, l1=0., l2=0.01, - tree_complexity=0.) + tree_complexity=0., + min_node_weight=0.) def _get_expected_ensembles_for_classification(self): first_round = """ diff --git a/tensorflow/python/kernel_tests/boosted_trees/stats_ops_test.py b/tensorflow/python/kernel_tests/boosted_trees/stats_ops_test.py index 4d09cf94d4..f0bb84e69a 100644 --- a/tensorflow/python/kernel_tests/boosted_trees/stats_ops_test.py +++ b/tensorflow/python/kernel_tests/boosted_trees/stats_ops_test.py @@ -59,6 +59,7 @@ class StatsOpsTest(test_util.TensorFlowTestCase): l1=0.0, l2=0.0, tree_complexity=0.0, + min_node_weight=0, max_splits=max_splits) self.assertAllEqual([[1, 2], [1, 2]], sess.run(node_ids_list)) @@ -106,6 +107,7 @@ class StatsOpsTest(test_util.TensorFlowTestCase): l1=0.0, l2=0.1, tree_complexity=0.0, + min_node_weight=0, max_splits=max_splits) self.assertAllEqual([[1, 2], [1, 2]], sess.run(node_ids_list)) @@ -154,6 +156,7 @@ class StatsOpsTest(test_util.TensorFlowTestCase): l1=l1, l2=0.0, tree_complexity=0.0, + min_node_weight=0, max_splits=max_splits) self.assertAllEqual([[0, 1], [1, 1]], sess.run(thresholds_list)) @@ -205,6 +208,7 @@ class StatsOpsTest(test_util.TensorFlowTestCase): l1=0.0, l2=l2, tree_complexity=tree_complexity, + min_node_weight=0, max_splits=max_splits) self.assertAllEqual([[1, 2], [1, 2]], sess.run(node_ids_list)) @@ -220,6 +224,53 @@ class StatsOpsTest(test_util.TensorFlowTestCase): self.assertAllClose([[[-.424658], [-.6]], [[-.043478], [.485294]]], sess.run(right_node_contribs_list)) + def testCalculateBestGainsWithMinNodeWEight(self): + """Testing Gain calculation without any regularization.""" + with self.test_session() as sess: + max_splits = 7 + node_id_range = [1, 3] # node 1 through 2 will be processed. + stats_summary_list = [ + [ + [[0., 0.], [.08, .09], [0., 0.], [0., 0.]], # node 0; ignored + [[0., 0.], [.15, .036], [.06, .07], [.1, .2]], # node 1 + [[0., 0.], [-.33, .68], [0., 0.], [.3, .4]], # node 2 + [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 3; ignored + [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 4; ignored + [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 5; ignored + [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 6; ignored + ], # feature 0 + [ + [[0., 0.], [0., 0.], [.08, .09], [0., 0.]], # node 0; ignored + [[0., 0.], [.3, .5], [-.05, .6], [.06, .07]], # node 1 + [[.1, .1], [.2, .03], [-.4, .05], [.07, .08]], # node 2 + [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 3; ignored + [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 4; ignored + [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 5; ignored + [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 6; ignored + ], # feature 1 + ] # num_features * shape=[max_splits, num_buckets, 2] + + (node_ids_list, gains_list, thresholds_list, left_node_contribs_list, + right_node_contribs_list + ) = boosted_trees_ops.calculate_best_gains_per_feature( + node_id_range, + stats_summary_list, + l1=0.0, + l2=0.0, + tree_complexity=0.0, + min_node_weight=1, + max_splits=max_splits) + + # We can't split node 1 on feature 1 and node 2 on feature 2 because of + # the min node weight. + self.assertAllEqual([[2], [1]], sess.run(node_ids_list)) + self.assertAllClose([[0.384314], [0.098013]], sess.run(gains_list)) + self.assertAllEqual([[1], [1]], sess.run(thresholds_list)) + self.assertAllClose([[[0.4852941]], [[-.6]]], + sess.run(left_node_contribs_list)) + self.assertAllClose([[[-0.75]], [[-0.014925]]], + sess.run(right_node_contribs_list)) + def testMakeStatsSummarySimple(self): """Simple test for MakeStatsSummary.""" with self.test_session(): diff --git a/tensorflow/tools/api/golden/tensorflow.estimator.-boosted-trees-classifier.pbtxt b/tensorflow/tools/api/golden/tensorflow.estimator.-boosted-trees-classifier.pbtxt index fd9be8c759..53a903c239 100644 --- a/tensorflow/tools/api/golden/tensorflow.estimator.-boosted-trees-classifier.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.estimator.-boosted-trees-classifier.pbtxt @@ -21,7 +21,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'feature_columns\', \'n_batches_per_layer\', \'model_dir\', \'n_classes\', \'weight_column\', \'label_vocabulary\', \'n_trees\', \'max_depth\', \'learning_rate\', \'l1_regularization\', \'l2_regularization\', \'tree_complexity\', \'config\'], varargs=None, keywords=None, defaults=[\'None\', \'\', \'None\', \'None\', \'100\', \'6\', \'0.1\', \'0.0\', \'0.0\', \'0.0\', \'None\'], " + argspec: "args=[\'self\', \'feature_columns\', \'n_batches_per_layer\', \'model_dir\', \'n_classes\', \'weight_column\', \'label_vocabulary\', \'n_trees\', \'max_depth\', \'learning_rate\', \'l1_regularization\', \'l2_regularization\', \'tree_complexity\', \'min_node_weight\', \'config\'], varargs=None, keywords=None, defaults=[\'None\', \'\', \'None\', \'None\', \'100\', \'6\', \'0.1\', \'0.0\', \'0.0\', \'0.0\', \'0.0\', \'None\'], " } member_method { name: "evaluate" diff --git a/tensorflow/tools/api/golden/tensorflow.estimator.-boosted-trees-regressor.pbtxt b/tensorflow/tools/api/golden/tensorflow.estimator.-boosted-trees-regressor.pbtxt index 6b305be43f..ba17c90de2 100644 --- a/tensorflow/tools/api/golden/tensorflow.estimator.-boosted-trees-regressor.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.estimator.-boosted-trees-regressor.pbtxt @@ -21,7 +21,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'feature_columns\', \'n_batches_per_layer\', \'model_dir\', \'label_dimension\', \'weight_column\', \'n_trees\', \'max_depth\', \'learning_rate\', \'l1_regularization\', \'l2_regularization\', \'tree_complexity\', \'config\'], varargs=None, keywords=None, defaults=[\'None\', \'\', \'None\', \'100\', \'6\', \'0.1\', \'0.0\', \'0.0\', \'0.0\', \'None\'], " + argspec: "args=[\'self\', \'feature_columns\', \'n_batches_per_layer\', \'model_dir\', \'label_dimension\', \'weight_column\', \'n_trees\', \'max_depth\', \'learning_rate\', \'l1_regularization\', \'l2_regularization\', \'tree_complexity\', \'min_node_weight\', \'config\'], varargs=None, keywords=None, defaults=[\'None\', \'\', \'None\', \'100\', \'6\', \'0.1\', \'0.0\', \'0.0\', \'0.0\', \'0.0\', \'None\'], " } member_method { name: "evaluate" -- GitLab From cbf1fc8ba96a6e9d6a36a2d09a82ea1ff9af2752 Mon Sep 17 00:00:00 2001 From: Younghee Kwon Date: Mon, 16 Apr 2018 19:10:10 -0700 Subject: [PATCH 030/434] BoostedTreesEstimator in contrib: train_in_memory works with input_fns returning data.Dataset. Only one batch of data is expected, so dataset.batch() is disallowed, and dataset.repeat() will be ignored (only the first one would be used) PiperOrigin-RevId: 193137094 --- .../python/estimator/boosted_trees.py | 38 +++- .../python/estimator/boosted_trees_test.py | 80 +++++++-- .../python/estimator/canned/boosted_trees.py | 149 +++++++++++----- .../estimator/canned/boosted_trees_test.py | 167 +++++++++++++++++- 4 files changed, 362 insertions(+), 72 deletions(-) diff --git a/tensorflow/contrib/estimator/python/estimator/boosted_trees.py b/tensorflow/contrib/estimator/python/estimator/boosted_trees.py index 00356ce0ca..bd641014e9 100644 --- a/tensorflow/contrib/estimator/python/estimator/boosted_trees.py +++ b/tensorflow/contrib/estimator/python/estimator/boosted_trees.py @@ -17,10 +17,22 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.python.data.ops import dataset_ops from tensorflow.python.estimator import estimator from tensorflow.python.estimator.canned import boosted_trees as canned_boosted_trees +def _validate_input_fn_and_repeat_dataset(train_input_fn): + """Validates whether the input_fn is valid, and repeat() if tf.Dataset.""" + def _input_fn(): + result_input_fn = train_input_fn() + if isinstance(result_input_fn, dataset_ops.Dataset): + return result_input_fn.repeat() + return result_input_fn + + return _input_fn + + class _BoostedTreesEstimator(estimator.Estimator): """An Estimator for Tensorflow Boosted Trees models.""" @@ -113,10 +125,13 @@ def boosted_trees_classifier_train_in_memory( bucketized_feature_2 = bucketized_column( numeric_column('feature_2'), BUCKET_BOUNDARIES_2) - def input_fn_train(): + def train_input_fn(): dataset = create-dataset-from-training-data - # Don't use repeat or cache, since it is assumed to be one epoch - # This is either tf.data.Dataset, or a tuple of feature dict and label. + # This is tf.data.Dataset of a tuple of feature dict and label. + # e.g. Dataset.zip((Dataset.from_tensors({'f1': f1_array, ...}), + # Dataset.from_tensors(label_array))) + # The returned Dataset shouldn't be batched. + # If Dataset repeats, only the first repetition would be used for training. return dataset classifier = boosted_trees_classifier_train_in_memory( @@ -210,7 +225,9 @@ def boosted_trees_classifier_train_in_memory( in_memory_classifier = estimator.Estimator( model_fn=_model_fn, model_dir=model_dir, config=config) - in_memory_classifier.train(input_fn=train_input_fn, hooks=train_hooks) + in_memory_classifier.train( + input_fn=_validate_input_fn_and_repeat_dataset(train_input_fn), + hooks=train_hooks) return in_memory_classifier # pylint: enable=protected-access @@ -241,10 +258,13 @@ def boosted_trees_regressor_train_in_memory( bucketized_feature_2 = bucketized_column( numeric_column('feature_2'), BUCKET_BOUNDARIES_2) - def input_fn_train(): + def train_input_fn(): dataset = create-dataset-from-training-data - # Don't use repeat or cache, since it is assumed to be one epoch - # This is either tf.data.Dataset, or a tuple of feature dict and label. + # This is tf.data.Dataset of a tuple of feature dict and label. + # e.g. Dataset.zip((Dataset.from_tensors({'f1': f1_array, ...}), + # Dataset.from_tensors(label_array))) + # The returned Dataset shouldn't be batched. + # If Dataset repeats, only the first repetition would be used for training. return dataset regressor = boosted_trees_regressor_train_in_memory( @@ -329,7 +349,9 @@ def boosted_trees_regressor_train_in_memory( in_memory_regressor = estimator.Estimator( model_fn=_model_fn, model_dir=model_dir, config=config) - in_memory_regressor.train(input_fn=train_input_fn, hooks=train_hooks) + in_memory_regressor.train( + input_fn=_validate_input_fn_and_repeat_dataset(train_input_fn), + hooks=train_hooks) return in_memory_regressor # pylint: enable=protected-access diff --git a/tensorflow/contrib/estimator/python/estimator/boosted_trees_test.py b/tensorflow/contrib/estimator/python/estimator/boosted_trees_test.py index eee5910687..76cbefe5e9 100644 --- a/tensorflow/contrib/estimator/python/estimator/boosted_trees_test.py +++ b/tensorflow/contrib/estimator/python/estimator/boosted_trees_test.py @@ -21,6 +21,7 @@ import numpy as np from tensorflow.contrib.estimator.python.estimator import boosted_trees from tensorflow.core.kernels.boosted_trees import boosted_trees_pb2 +from tensorflow.python.data.ops import dataset_ops from tensorflow.python.estimator.canned import boosted_trees as canned_boosted_trees from tensorflow.python.estimator.inputs import numpy_io from tensorflow.python.feature_column import feature_column @@ -49,12 +50,24 @@ def _make_train_input_fn(is_classification): """Makes train input_fn for classification/regression.""" def _input_fn(): - features = dict(FEATURES_DICT) - if is_classification: - labels = CLASSIFICATION_LABELS - else: - labels = REGRESSION_LABELS - return features, labels + features_dict = dict(FEATURES_DICT) + labels = CLASSIFICATION_LABELS if is_classification else REGRESSION_LABELS + return features_dict, labels + + return _input_fn + + +def _make_train_input_fn_dataset(is_classification): + """Makes input_fn using Dataset.""" + + def _input_fn(): + features_dict = dict(FEATURES_DICT) + labels = CLASSIFICATION_LABELS if is_classification else REGRESSION_LABELS + ds = dataset_ops.Dataset.zip( + (dataset_ops.Dataset.from_tensors(features_dict), + dataset_ops.Dataset.from_tensors(labels) + )) + return ds return _input_fn @@ -132,15 +145,13 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False) est = boosted_trees.boosted_trees_classifier_train_in_memory( - train_input_fn=train_input_fn, - feature_columns=self._feature_columns, - n_trees=1, - max_depth=5) + train_input_fn=train_input_fn, feature_columns=self._feature_columns, + n_trees=1, max_depth=5) # It will stop after 5 steps because of the max depth and num trees. self._assert_checkpoint( est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5) - # Check eval. + # Check evaluate and predict. eval_res = est.evaluate(input_fn=train_input_fn, steps=1) self.assertAllClose(eval_res['accuracy'], 1.0) # Validate predictions. @@ -148,24 +159,59 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): self.assertAllClose([[0], [1], [1], [0], [0]], [pred['class_ids'] for pred in predictions]) + def testBinaryClassifierTrainInMemoryWithDataset(self): + train_input_fn = _make_train_input_fn_dataset(is_classification=True) + predict_input_fn = numpy_io.numpy_input_fn( + x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False) + + est = boosted_trees.boosted_trees_classifier_train_in_memory( + train_input_fn=train_input_fn, feature_columns=self._feature_columns, + n_trees=1, max_depth=5) + # It will stop after 5 steps because of the max depth and num trees. + self._assert_checkpoint( + est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5) + + # Check evaluate and predict. + eval_res = est.evaluate(input_fn=train_input_fn, steps=1) + self.assertAllClose(eval_res['accuracy'], 1.0) + predictions = list(est.predict(input_fn=predict_input_fn)) + self.assertAllClose([[0], [1], [1], [0], [0]], + [pred['class_ids'] for pred in predictions]) + def testRegressorTrainInMemoryAndEvalAndInfer(self): train_input_fn = _make_train_input_fn(is_classification=False) predict_input_fn = numpy_io.numpy_input_fn( x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False) est = boosted_trees.boosted_trees_regressor_train_in_memory( - train_input_fn=train_input_fn, - feature_columns=self._feature_columns, - n_trees=1, - max_depth=5) + train_input_fn=train_input_fn, feature_columns=self._feature_columns, + n_trees=1, max_depth=5) # It will stop after 5 steps because of the max depth and num trees. self._assert_checkpoint( est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5) - # Check eval. + # Check evaluate and predict. + eval_res = est.evaluate(input_fn=train_input_fn, steps=1) + self.assertAllClose(eval_res['average_loss'], 2.478283) + predictions = list(est.predict(input_fn=predict_input_fn)) + self.assertAllClose( + [[0.571619], [0.262821], [0.124549], [0.956801], [1.769801]], + [pred['predictions'] for pred in predictions]) + + def testRegressorTrainInMemoryWithDataset(self): + train_input_fn = _make_train_input_fn_dataset(is_classification=False) + predict_input_fn = numpy_io.numpy_input_fn( + x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False) + + est = boosted_trees.boosted_trees_regressor_train_in_memory( + train_input_fn=train_input_fn, feature_columns=self._feature_columns, + n_trees=1, max_depth=5) + # It will stop after 5 steps because of the max depth and num trees. + self._assert_checkpoint( + est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5) + # Check evaluate and predict. eval_res = est.evaluate(input_fn=train_input_fn, steps=1) self.assertAllClose(eval_res['average_loss'], 2.478283) - # Validate predictions. predictions = list(est.predict(input_fn=predict_input_fn)) self.assertAllClose( [[0.571619], [0.262821], [0.124549], [0.956801], [1.769801]], diff --git a/tensorflow/python/estimator/canned/boosted_trees.py b/tensorflow/python/estimator/canned/boosted_trees.py index 536bd2bf81..085dace1b3 100644 --- a/tensorflow/python/estimator/canned/boosted_trees.py +++ b/tensorflow/python/estimator/canned/boosted_trees.py @@ -32,6 +32,7 @@ from tensorflow.python.ops import data_flow_ops from tensorflow.python.ops import gradients_impl from tensorflow.python.ops import lookup_ops from tensorflow.python.ops import math_ops +from tensorflow.python.ops import state_ops from tensorflow.python.ops import variable_scope from tensorflow.python.ops.losses import losses from tensorflow.python.summary import summary @@ -50,6 +51,32 @@ _HOLD_FOR_MULTI_CLASS_SUPPORT = object() _HOLD_FOR_MULTI_DIM_SUPPORT = object() +def _get_max_buckets(feature_columns): + """Gets the maximum number of buckets from feature_columns. + + Args: + feature_columns: a list/set of tf.feature_column. + + Returns: + max_buckets: the maximum number of buckets among bucketized_columns. + + Raises: + ValueError: when unsupported feature_columns are given. + """ + if not feature_columns: + raise ValueError('feature_columns must be a non-empty list/set of ' + 'tf.feature_column.') + max_buckets = 1 + for fc in feature_columns: + if isinstance(fc, feature_column_lib._BucketizedColumn): # pylint:disable=protected-access + # N boundaries creates (N+1) buckets. + max_buckets = max(max_buckets, len(fc.boundaries) + 1) + else: + raise ValueError('For now, only bucketized_column is supported but ' + 'got: {}'.format(fc)) + return max_buckets + + def _get_transformed_features(features, feature_columns): """Gets the transformed features from features/feature_columns pair. @@ -59,36 +86,31 @@ def _get_transformed_features(features, feature_columns): Returns: result_features: a list of the transformed features, sorted by the name. - num_buckets: the maximum number of buckets across bucketized_columns. Raises: ValueError: when unsupported features/columns are tried. """ - num_buckets = 1 # pylint:disable=protected-access for fc in feature_columns: - if isinstance(fc, feature_column_lib._BucketizedColumn): - # N boundaries creates (N+1) buckets. - num_buckets = max(num_buckets, len(fc.boundaries) + 1) - else: + if not isinstance(fc, feature_column_lib._BucketizedColumn): raise ValueError('For now, only bucketized_column is supported but ' 'got: {}'.format(fc)) - transformed = feature_column_lib._transform_features(features, - feature_columns) + transformed_features = feature_column_lib._transform_features( + features, feature_columns) # pylint:enable=protected-access result_features = [] - for column in sorted(transformed, key=lambda tc: tc.name): + for column in sorted(transformed_features, key=lambda tc: tc.name): source_name = column.source_column.name - squeezed_tensor = array_ops.squeeze(transformed[column], axis=1) + squeezed_tensor = array_ops.squeeze(transformed_features[column], axis=1) if len(squeezed_tensor.shape) > 1: raise ValueError('For now, only supports features equivalent to rank 1 ' 'but column `{}` got: {}'.format( source_name, features[source_name].shape)) result_features.append(squeezed_tensor) - return result_features, num_buckets + return result_features -def _keep_as_local_variable(tensor, name=None): +def _local_variable(tensor, name=None): """Stores a tensor as a local Variable for faster read.""" return variable_scope.variable( initial_value=tensor, @@ -98,6 +120,48 @@ def _keep_as_local_variable(tensor, name=None): name=name) +def _cache_transformed_features(features, feature_columns, batch_size): + """Transform features and cache, then returns (cached_features, cache_op).""" + num_features = len(feature_columns) + cached_features = [ + _local_variable( + array_ops.zeros([batch_size], dtype=dtypes.int32), + name='cached_feature_{}'.format(i)) + for i in range(num_features) + ] + are_features_cached = _local_variable(False, name='are_features_cached') + + def cache_features_and_return(): + """Caches transoformed features. + + The intention is to hide get_transformed_features() from the graph by + caching the result except the first step, since bucketize operation + (inside get_transformed_features) is expensive. + + Returns: + input_feature_list: a list of input features. + cache_flip_op: op to add to graph to make sure cache update is included to + the graph. + """ + + transformed_features = _get_transformed_features(features, feature_columns) + cached = [ + state_ops.assign(cached_features[i], transformed_features[i]) + for i in range(num_features) + ] + # TODO(youngheek): Try other combination of dependencies so that the + # function returns a single result, not a tuple. + with ops.control_dependencies(cached): + cache_flip_op = are_features_cached.assign(True) + return cached, cache_flip_op + + input_feature_list, cache_flip_op = control_flow_ops.cond( + are_features_cached, + lambda: (cached_features, control_flow_ops.no_op()), + cache_features_and_return) + return input_feature_list, cache_flip_op + + class _CacheTrainingStatesUsingHashTable(object): """Caching logits, etc. using MutableHashTable.""" @@ -186,13 +250,13 @@ class _CacheTrainingStatesUsingVariables(object): logits_dimension: a constant (int) for the dimension of logits. """ self._logits_dimension = logits_dimension - self._tree_ids = _keep_as_local_variable( + self._tree_ids = _local_variable( array_ops.zeros([batch_size], dtype=dtypes.int32), name='tree_ids_cache') - self._node_ids = _keep_as_local_variable( + self._node_ids = _local_variable( array_ops.zeros([batch_size], dtype=dtypes.int32), name='node_ids_cache') - self._logits = _keep_as_local_variable( + self._logits = _local_variable( array_ops.zeros([batch_size, logits_dimension], dtype=dtypes.float32), name='logits_cache') @@ -290,33 +354,38 @@ def _bt_model_fn( 'When train_in_memory is enabled, input_fn should return the entire ' 'dataset as a single batch, and n_batches_per_layer should be set as ' '1.') + if (not config.is_chief or config.num_worker_replicas > 1 or + config.num_ps_replicas > 0): + raise ValueError('train_in_memory is supported only for ' + 'non-distributed training.') worker_device = control_flow_ops.no_op().device # maximum number of splits possible in the whole tree =2^(D-1)-1 # TODO(youngheek): perhaps storage could be optimized by storing stats with # the dimension max_splits_per_layer, instead of max_splits (for the entire # tree). max_splits = (1 << tree_hparams.max_depth) - 1 + max_buckets = _get_max_buckets(feature_columns) + train_op = [] with ops.name_scope(name) as name: # Prepare. global_step = training_util.get_or_create_global_step() - input_feature_list, num_buckets = _get_transformed_features( - features, feature_columns) - if train_in_memory and mode == model_fn.ModeKeys.TRAIN: - input_feature_list = [ - _keep_as_local_variable(feature) for feature in input_feature_list - ] - num_features = len(input_feature_list) - - cache = None - if mode == model_fn.ModeKeys.TRAIN: - if train_in_memory and is_single_machine: # maybe just train_in_memory? - batch_size = array_ops.shape(input_feature_list[0])[0] - cache = _CacheTrainingStatesUsingVariables(batch_size, - head.logits_dimension) - elif example_id_column_name: + num_features = len(feature_columns) + # Extract input features and set up cache for training. + training_state_cache = None + if mode == model_fn.ModeKeys.TRAIN and train_in_memory: + # cache transformed features as well for in-memory training. + batch_size = array_ops.shape(labels)[0] + input_feature_list, input_cache_op = _cache_transformed_features( + features, feature_columns, batch_size) + train_op.append(input_cache_op) + training_state_cache = _CacheTrainingStatesUsingVariables( + batch_size, head.logits_dimension) + else: + input_feature_list = _get_transformed_features(features, feature_columns) + if mode == model_fn.ModeKeys.TRAIN and example_id_column_name: example_ids = features[example_id_column_name] - cache = _CacheTrainingStatesUsingHashTable(example_ids, - head.logits_dimension) + training_state_cache = _CacheTrainingStatesUsingHashTable( + example_ids, head.logits_dimension) # Create Ensemble resources. tree_ensemble = boosted_trees_ops.TreeEnsemble(name=name) @@ -340,11 +409,12 @@ def _bt_model_fn( # TODO(soroush): Do partial updates if this becomes a bottleneck. ensemble_reload = local_tree_ensemble.deserialize( *tree_ensemble.serialize()) - if cache: - cached_tree_ids, cached_node_ids, cached_logits = cache.lookup() + if training_state_cache: + cached_tree_ids, cached_node_ids, cached_logits = ( + training_state_cache.lookup()) else: # Always start from the beginning when no cache is set up. - batch_size = array_ops.shape(input_feature_list[0])[0] + batch_size = array_ops.shape(labels)[0] cached_tree_ids, cached_node_ids, cached_logits = ( array_ops.zeros([batch_size], dtype=dtypes.int32), array_ops.zeros([batch_size], dtype=dtypes.int32), @@ -368,9 +438,8 @@ def _bt_model_fn( # Create training graph. def _train_op_fn(loss): """Run one training iteration.""" - train_op = [] - if cache: - train_op.append(cache.insert(tree_ids, node_ids, logits)) + if training_state_cache: + train_op.append(training_state_cache.insert(tree_ids, node_ids, logits)) if closed_form_grad_and_hess_fn: gradients, hessians = closed_form_grad_and_hess_fn(logits, labels) else: @@ -385,7 +454,7 @@ def _bt_model_fn( hessians=hessians, bucketized_features_list=[input_feature_list[f]], max_splits=max_splits, - num_buckets=num_buckets), + num_buckets=max_buckets), axis=0) for f in range(num_features) ] @@ -422,7 +491,7 @@ def _bt_model_fn( summary_accumulator = data_flow_ops.ConditionalAccumulator( dtype=dtypes.float32, # The stats consist of gradients and hessians (the last dimension). - shape=[num_features, max_splits, num_buckets, 2], + shape=[num_features, max_splits, max_buckets, 2], shared_name='stats_summary_accumulator') apply_grad = summary_accumulator.apply_grad( array_ops.stack(stats_summary_list, axis=0), stamp_token) diff --git a/tensorflow/python/estimator/canned/boosted_trees_test.py b/tensorflow/python/estimator/canned/boosted_trees_test.py index 56e67a6707..c8c52d3bc6 100644 --- a/tensorflow/python/estimator/canned/boosted_trees_test.py +++ b/tensorflow/python/estimator/canned/boosted_trees_test.py @@ -20,6 +20,7 @@ from __future__ import print_function import numpy as np from tensorflow.core.kernels.boosted_trees import boosted_trees_pb2 +from tensorflow.python.data.ops import dataset_ops from tensorflow.python.estimator import model_fn from tensorflow.python.estimator import run_config from tensorflow.python.estimator.canned import boosted_trees @@ -58,13 +59,32 @@ def _make_train_input_fn(is_classification): """Makes train input_fn for classification/regression.""" def _input_fn(): - features = dict(FEATURES_DICT) - features[EXAMPLE_ID_COLUMN] = constant_op.constant(EXAMPLE_IDS) - if is_classification: - labels = CLASSIFICATION_LABELS + features_dict = dict(FEATURES_DICT) + features_dict[EXAMPLE_ID_COLUMN] = constant_op.constant(EXAMPLE_IDS) + labels = CLASSIFICATION_LABELS if is_classification else REGRESSION_LABELS + return features_dict, labels + + return _input_fn + + +def _make_train_input_fn_dataset(is_classification, batch=None, repeat=None): + """Makes input_fn using Dataset.""" + + def _input_fn(): + features_dict = dict(FEATURES_DICT) + features_dict[EXAMPLE_ID_COLUMN] = constant_op.constant(EXAMPLE_IDS) + labels = CLASSIFICATION_LABELS if is_classification else REGRESSION_LABELS + if batch: + ds = dataset_ops.Dataset.zip( + (dataset_ops.Dataset.from_tensor_slices(features_dict), + dataset_ops.Dataset.from_tensor_slices(labels))).batch(batch) else: - labels = REGRESSION_LABELS - return features, labels + ds = dataset_ops.Dataset.zip( + (dataset_ops.Dataset.from_tensors(features_dict), + dataset_ops.Dataset.from_tensors(labels))) + # repeat indefinitely by default, or stop at the given step. + ds = ds.repeat(repeat) + return ds return _input_fn @@ -125,9 +145,28 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): num_steps = 100 # Train for a few steps, and validate final checkpoint. est.train(train_input_fn, steps=num_steps) + self._assert_checkpoint( + est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5) + predictions = list(est.predict(input_fn=predict_input_fn)) + self.assertAllClose([[0], [1], [1], [0], [0]], + [pred['class_ids'] for pred in predictions]) + def testTrainClassifierWithDataset(self): + train_input_fn = _make_train_input_fn_dataset(is_classification=True) + predict_input_fn = numpy_io.numpy_input_fn( + x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False) + + est = boosted_trees.BoostedTreesClassifier( + feature_columns=self._feature_columns, + n_batches_per_layer=1, + n_trees=1, + max_depth=5) + est.train(train_input_fn, steps=100) # will stop after 5 steps anyway. + self._assert_checkpoint( + est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5) + eval_res = est.evaluate(input_fn=train_input_fn, steps=1) + self.assertAllClose(eval_res['accuracy'], 1.0) predictions = list(est.predict(input_fn=predict_input_fn)) - # All labels are correct. self.assertAllClose([[0], [1], [1], [0], [0]], [pred['class_ids'] for pred in predictions]) @@ -166,12 +205,126 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): est.train(train_input_fn, steps=num_steps) self._assert_checkpoint( est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5) + predictions = list(est.predict(input_fn=predict_input_fn)) + self.assertAllClose( + [[0.571619], [0.262821], [0.124549], [0.956801], [1.769801]], + [pred['predictions'] for pred in predictions]) + + def testTrainRegressorWithDataset(self): + train_input_fn = _make_train_input_fn_dataset(is_classification=False) + predict_input_fn = numpy_io.numpy_input_fn( + x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False) + + est = boosted_trees.BoostedTreesRegressor( + feature_columns=self._feature_columns, + n_batches_per_layer=1, + n_trees=1, + max_depth=5) + est.train(train_input_fn, steps=100) # will stop after 5 steps anyway. + self._assert_checkpoint( + est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5) + eval_res = est.evaluate(input_fn=train_input_fn, steps=1) + self.assertAllClose(eval_res['average_loss'], 2.478283) + predictions = list(est.predict(input_fn=predict_input_fn)) + self.assertAllClose( + [[0.571619], [0.262821], [0.124549], [0.956801], [1.769801]], + [pred['predictions'] for pred in predictions]) + + def testTrainRegressorWithDatasetBatch(self): + # The batch_size as the entire data size should yield the same result as + # dataset without batching. + train_input_fn = _make_train_input_fn_dataset( + is_classification=False, batch=5) + predict_input_fn = numpy_io.numpy_input_fn( + x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False) + + est = boosted_trees.BoostedTreesRegressor( + feature_columns=self._feature_columns, + n_batches_per_layer=1, + n_trees=1, + max_depth=5) + est.train(train_input_fn, steps=100) # will stop after 5 steps anyway. + self._assert_checkpoint( + est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5) + eval_res = est.evaluate(input_fn=train_input_fn, steps=1) + self.assertAllClose(eval_res['average_loss'], 2.478283) + predictions = list(est.predict(input_fn=predict_input_fn)) + self.assertAllClose( + [[0.571619], [0.262821], [0.124549], [0.956801], [1.769801]], + [pred['predictions'] for pred in predictions]) + + def testTrainRegressorWithDatasetLargerBatch(self): + # The batch_size as the multiple of the entire data size should still yield + # the same result. + train_input_fn = _make_train_input_fn_dataset( + is_classification=False, batch=15) + predict_input_fn = numpy_io.numpy_input_fn( + x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False) + + est = boosted_trees.BoostedTreesRegressor( + feature_columns=self._feature_columns, + n_batches_per_layer=1, + n_trees=1, + max_depth=5) + est.train(train_input_fn, steps=100) # will stop after 5 steps anyway. + self._assert_checkpoint( + est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5) + eval_res = est.evaluate(input_fn=train_input_fn, steps=1) + self.assertAllClose(eval_res['average_loss'], 2.478283) + predictions = list(est.predict(input_fn=predict_input_fn)) + self.assertAllClose( + [[0.571619], [0.262821], [0.124549], [0.956801], [1.769801]], + [pred['predictions'] for pred in predictions]) + + def testTrainRegressorWithDatasetSmallerBatch(self): + # Even when using small batches, if (n_batches_per_layer * batch_size) makes + # the same entire data size, the result should be the same. + train_input_fn = _make_train_input_fn_dataset( + is_classification=False, batch=1) + predict_input_fn = numpy_io.numpy_input_fn( + x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False) + est = boosted_trees.BoostedTreesRegressor( + feature_columns=self._feature_columns, + n_batches_per_layer=5, + n_trees=1, + max_depth=5) + # Train stops after (n_batches_per_layer * n_trees * max_depth) steps. + est.train(train_input_fn, steps=100) + self._assert_checkpoint( + est.model_dir, global_step=25, finalized_trees=1, attempted_layers=5) + # 5 batches = one epoch. + eval_res = est.evaluate(input_fn=train_input_fn, steps=5) + self.assertAllClose(eval_res['average_loss'], 2.478283) predictions = list(est.predict(input_fn=predict_input_fn)) self.assertAllClose( [[0.571619], [0.262821], [0.124549], [0.956801], [1.769801]], [pred['predictions'] for pred in predictions]) + def testTrainRegressorWithDatasetWhenInputIsOverEarlier(self): + train_input_fn = _make_train_input_fn_dataset( + is_classification=False, repeat=3) # to stop input after 3 steps. + predict_input_fn = numpy_io.numpy_input_fn( + x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False) + + est = boosted_trees.BoostedTreesRegressor( + feature_columns=self._feature_columns, + n_batches_per_layer=1, + n_trees=1, + max_depth=5) + # Note that training will stop when input exhausts. + # This might not be a typical pattern, but dataset.repeat(3) causes + # the input stream to cease after 3 steps. + est.train(train_input_fn, steps=100) + self._assert_checkpoint( + est.model_dir, global_step=3, finalized_trees=0, attempted_layers=3) + eval_res = est.evaluate(input_fn=train_input_fn, steps=1) + self.assertAllClose(eval_res['average_loss'], 3.777295) + predictions = list(est.predict(input_fn=predict_input_fn)) + self.assertAllClose( + [[0.353850], [0.254100], [0.106850], [0.712100], [1.012100]], + [pred['predictions'] for pred in predictions]) + class ModelFnTests(test_util.TensorFlowTestCase): """Tests bt_model_fn including unexposed internal functionalities.""" -- GitLab From 421d1c077053e6e38e4c9cee99641edcd4d9ca1e Mon Sep 17 00:00:00 2001 From: Yifei Feng Date: Wed, 11 Apr 2018 18:20:19 -0700 Subject: [PATCH 031/434] In model_to_estimator, only run get_weights when there are initialized Keras variables(which assumes there exists a session). Otherwise create a session so that we can run get_config(). Actually fix #18193. PiperOrigin-RevId: 192541442 --- .../python/keras/_impl/keras/estimator.py | 45 +++++++++----- .../keras/_impl/keras/estimator_test.py | 61 ++++++++++--------- 2 files changed, 63 insertions(+), 43 deletions(-) diff --git a/tensorflow/python/keras/_impl/keras/estimator.py b/tensorflow/python/keras/_impl/keras/estimator.py index 8043242b70..b922a6c683 100644 --- a/tensorflow/python/keras/_impl/keras/estimator.py +++ b/tensorflow/python/keras/_impl/keras/estimator.py @@ -26,7 +26,6 @@ from tensorflow.python.estimator import estimator as estimator_lib from tensorflow.python.estimator import export as export_lib from tensorflow.python.estimator import model_fn as model_fn_lib from tensorflow.python.estimator import run_config as run_config_lib -from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.framework import random_seed from tensorflow.python.framework import sparse_tensor as sparse_tensor_lib @@ -38,6 +37,7 @@ from tensorflow.python.keras._impl.keras.engine.network import Network from tensorflow.python.keras._impl.keras.utils.generic_utils import CustomObjectScope from tensorflow.python.ops import math_ops from tensorflow.python.ops import metrics as metrics_module +from tensorflow.python.ops import variables as variables_module from tensorflow.python.platform import tf_logging as logging from tensorflow.python.saved_model import signature_constants from tensorflow.python.training import saver as saver_lib @@ -55,6 +55,19 @@ def _cast_tensor_to_floatx(x): return math_ops.cast(x, K.floatx()) +def _any_variable_initalized(): + """Check if any variable has been initialized in the Keras model. + + Returns: + boolean, True if at least one variable has been initalized, else False. + """ + variables = variables_module.global_variables() + for v in variables: + if getattr(v, '_keras_initialized', False): + return True + return False + + def _create_ordered_io(keras_model, estimator_io, is_input=True): """Create a list of tensors from IO dictionary based on Keras IO order. @@ -396,7 +409,8 @@ def _save_first_checkpoint(keras_model, estimator, custom_objects, custom_objects) # save to checkpoint with session.Session(config=estimator._session_config) as sess: - model.set_weights(keras_weights) + if keras_weights: + model.set_weights(keras_weights) # Make update ops and initialize all variables. if not model.train_function: # pylint: disable=protected-access @@ -466,20 +480,21 @@ def model_to_estimator(keras_model=None, estimator = estimator_lib.Estimator( keras_model_fn, model_dir=model_dir, config=config) - old_session = K._SESSION - # Pass the config into keras backend's default session. - sess = session.Session(config=estimator._session_config) - K.set_session(sess) - try: - keras_weights = keras_model.get_weights() - except errors.FailedPreconditionError as e: - if old_session is None: - raise e - logging.warning( - 'The Keras backend session has already been ' - 'set. The _session_config passed to model_to_estimator is not used.') - K.set_session(old_session) + # Check if we need to call get_weights: + if _any_variable_initalized(): keras_weights = keras_model.get_weights() + # Warn if config passed to estimator tries to update GPUOptions. If a + # session has already been created, the GPUOptions passed to the first + # session sticks. + if estimator._session_config.HasField('gpu_options'): + logging.warning( + 'The Keras backend session has already been set. ' + 'The _session_config passed to model_to_estimator will not be used.') + else: + # Pass the config into keras backend's default session. + sess = session.Session(config=estimator._session_config) + K.set_session(sess) + keras_weights = None if keras_model._is_graph_network: # TODO(yifeif): move checkpoint initialization to scaffold.init_fn diff --git a/tensorflow/python/keras/_impl/keras/estimator_test.py b/tensorflow/python/keras/_impl/keras/estimator_test.py index 27b7ec7dd4..653cdc01e2 100644 --- a/tensorflow/python/keras/_impl/keras/estimator_test.py +++ b/tensorflow/python/keras/_impl/keras/estimator_test.py @@ -27,10 +27,12 @@ import numpy as np from tensorflow.core.protobuf import config_pb2 from tensorflow.python.estimator import run_config as run_config_lib from tensorflow.python.estimator.inputs import numpy_io +from tensorflow.python.framework import ops from tensorflow.python.framework import test_util from tensorflow.python.keras._impl import keras from tensorflow.python.keras._impl.keras import testing_utils from tensorflow.python.keras._impl.keras.applications import mobilenet +from tensorflow.python.keras._impl.keras.optimizers import SGD from tensorflow.python.platform import gfile from tensorflow.python.platform import test from tensorflow.python.summary.writer import writer_cache @@ -443,8 +445,9 @@ class TestKerasEstimator(test_util.TensorFlowTestCase): model = simple_functional_model() model.compile( loss='categorical_crossentropy', optimizer='adam', metrics=['acc']) - est_keras = keras.estimator.model_to_estimator( - keras_model=model, config=self._config) + with self.test_session(): + est_keras = keras.estimator.model_to_estimator( + keras_model=model, config=self._config) with self.test_session(): with self.assertRaises(ValueError): @@ -497,20 +500,22 @@ class TestKerasEstimator(test_util.TensorFlowTestCase): model_dir=tempfile.mkdtemp(dir=self._base_dir)) def test_gpu_config(self): - keras_model, (_, _), (_, _), _, _ = get_resource_for_simple_model() - keras_model.compile( - loss='categorical_crossentropy', - optimizer='rmsprop', - metrics=['mse', keras.metrics.categorical_accuracy]) + with ops.Graph().as_default(): + keras_model, (_, _), (_, _), _, _ = get_resource_for_simple_model() + keras_model.compile( + loss='categorical_crossentropy', + optimizer='rmsprop', + metrics=['mse', keras.metrics.categorical_accuracy]) - gpu_options = config_pb2.GPUOptions(per_process_gpu_memory_fraction=0.3) - sess_config = config_pb2.ConfigProto(gpu_options=gpu_options) - self._config._session_config = sess_config - keras.estimator.model_to_estimator( - keras_model=keras_model, config=self._config) - self.assertEqual(keras.backend.get_session() - ._config.gpu_options.per_process_gpu_memory_fraction, - gpu_options.per_process_gpu_memory_fraction) + gpu_options = config_pb2.GPUOptions(per_process_gpu_memory_fraction=0.3) + sess_config = config_pb2.ConfigProto(gpu_options=gpu_options) + self._config._session_config = sess_config + keras.estimator.model_to_estimator( + keras_model=keras_model, config=self._config) + self.assertEqual( + keras.backend.get_session() + ._config.gpu_options.per_process_gpu_memory_fraction, + gpu_options.per_process_gpu_memory_fraction) def test_pretrained_weights(self): keras_model, (_, _), (_, _), _, _ = get_resource_for_simple_model() @@ -518,19 +523,19 @@ class TestKerasEstimator(test_util.TensorFlowTestCase): loss='categorical_crossentropy', optimizer=rmsprop.RMSPropOptimizer(1e-3), metrics=['mse', keras.metrics.categorical_accuracy]) - - keras_model.train_on_batch( - np.random.random((10,) + _INPUT_SIZE), np.random.random((10, - _NUM_CLASS))) - weights = keras_model.get_weights() - keras_model, (_, _), (_, _), _, _ = get_resource_for_simple_model() - keras_model.set_weights(weights) - keras_model.compile( - loss='categorical_crossentropy', - optimizer=rmsprop.RMSPropOptimizer(1e-3), - metrics=['mse', keras.metrics.categorical_accuracy]) - keras.estimator.model_to_estimator( - keras_model=keras_model, config=self._config) + with self.test_session(): + keras_model.train_on_batch( + np.random.random((10,) + _INPUT_SIZE), + np.random.random((10, _NUM_CLASS))) + weights = keras_model.get_weights() + keras_model, (_, _), (_, _), _, _ = get_resource_for_simple_model() + keras_model.set_weights(weights) + keras_model.compile( + loss='categorical_crossentropy', + optimizer=SGD(lr=0.0001, momentum=0.9), + metrics=['mse', keras.metrics.categorical_accuracy]) + keras.estimator.model_to_estimator( + keras_model=keras_model, config=self._config) if __name__ == '__main__': -- GitLab From ba25b8ba9f88df5db8c11c0bec9b27c8151af7d7 Mon Sep 17 00:00:00 2001 From: James Qin Date: Mon, 16 Apr 2018 14:52:41 -0700 Subject: [PATCH 032/434] Increase softmax gpu unittest numeric stability PiperOrigin-RevId: 193103363 --- tensorflow/python/kernel_tests/softmax_op_test.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/kernel_tests/softmax_op_test.py b/tensorflow/python/kernel_tests/softmax_op_test.py index 981f96b74d..dc4d4dbeab 100644 --- a/tensorflow/python/kernel_tests/softmax_op_test.py +++ b/tensorflow/python/kernel_tests/softmax_op_test.py @@ -39,6 +39,10 @@ class SoftmaxTest(test.TestCase): dim = len(features.shape) - 1 one_only_on_dim = list(features.shape) one_only_on_dim[dim] = 1 + is_fp16 = features.dtype == np.float16 + if is_fp16: + # Do the compute in fp32 and cast the input back to fp32. + features = features.astype(np.float32) e = np.exp(features - np.reshape( np.amax( features, axis=dim), one_only_on_dim)) @@ -47,6 +51,8 @@ class SoftmaxTest(test.TestCase): res = np.log(softmax) else: res = softmax + if is_fp16: + res = res.astype(np.float16) return res def _testSoftmax(self, np_features, dim=-1, log=False, use_gpu=False): @@ -125,8 +131,8 @@ class SoftmaxTest(test.TestCase): "Test only applicable when running on GPUs") def testFloatGPU(self): if test.is_gpu_available(cuda_only=True): - rows = [2**x + np.random.randint(0, 1024) for x in range(1, 10)] - cols = [2**x + np.random.randint(0, 1024) for x in range(1, 10)] + rows = [2**x + np.random.randint(0, 16) for x in range(1, 4)] + cols = [2**x + np.random.randint(0, 16) for x in range(1, 4)] for row, col in zip(rows, cols): logging.info("Testing softmax float dtype in shape [%d, %d]", row, col) data = np.random.rand(row, col) @@ -140,8 +146,8 @@ class SoftmaxTest(test.TestCase): "Test only applicable when running on GPUs") def testHalfGPU(self): if test.is_gpu_available(cuda_only=True): - rows = [2**x + np.random.randint(0, 1024) for x in range(1, 8)] - cols = [2**x + np.random.randint(0, 1024) for x in range(1, 8)] + rows = [2**x + np.random.randint(0, 16) for x in range(1, 4)] + cols = [2**x + np.random.randint(0, 16) for x in range(1, 4)] for row, col in zip(rows, cols): logging.info("Testing softmax half dtype in shape [%d, %d]", row, col) data = np.random.rand(row, col) -- GitLab From b5f8c3531924c56cf4866f57ce0ccea1b72b289e Mon Sep 17 00:00:00 2001 From: Yifei Feng <1192265+yifeif@users.noreply.github.com> Date: Tue, 17 Apr 2018 10:53:07 -0700 Subject: [PATCH 033/434] Enable consumption of GIT_TAG_OVERRIDE env var in release build script. (#18579) Enable consumption of GIT_TAG_OVERRIDE env var in release build script. --- tensorflow/contrib/cmake/tf_core_framework.cmake | 2 +- tensorflow/tools/ci_build/builds/pip.sh | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/cmake/tf_core_framework.cmake b/tensorflow/contrib/cmake/tf_core_framework.cmake index a1c320347f..73cadc58ff 100644 --- a/tensorflow/contrib/cmake/tf_core_framework.cmake +++ b/tensorflow/contrib/cmake/tf_core_framework.cmake @@ -276,7 +276,7 @@ add_custom_command(OUTPUT __force_rebuild COMMAND ${CMAKE_COMMAND} -E echo) add_custom_command(OUTPUT ${VERSION_INFO_CC} COMMAND ${PYTHON_EXECUTABLE} ${tensorflow_source_dir}/tensorflow/tools/git/gen_git_source.py - --raw_generate ${VERSION_INFO_CC} + ARGS --raw_generate ${VERSION_INFO_CC} --git_tag_override=${GIT_TAG_OVERRIDE} DEPENDS __force_rebuild) set(tf_version_srcs ${tensorflow_source_dir}/tensorflow/core/util/version_info.cc) diff --git a/tensorflow/tools/ci_build/builds/pip.sh b/tensorflow/tools/ci_build/builds/pip.sh index 82042b93c0..5fa75e1d61 100755 --- a/tensorflow/tools/ci_build/builds/pip.sh +++ b/tensorflow/tools/ci_build/builds/pip.sh @@ -123,6 +123,10 @@ done BAZEL_FLAGS=$(str_strip "${BAZEL_FLAGS}") +if [[ -z "$GIT_TAG_OVERRIDE" ]]; then + BAZEL_FLAGS+=" --action_env=GIT_TAG_OVERRIDE" +fi + echo "Using Bazel flags: ${BAZEL_FLAGS}" PIP_BUILD_TARGET="//tensorflow/tools/pip_package:build_pip_package" -- GitLab From 6e9d3ad2aad1d6fc417882a7f5c7aba22b7df18e Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Tue, 17 Apr 2018 14:59:58 -0700 Subject: [PATCH 034/434] [tf.data] Fix a device placement issue in `prefetch_to_device()`. (#18607) * [tf.data] Fix a device placement issue in `prefetch_to_device()`. Previously, the `iterator_get_device()` op was being infeasibly colocated with both the iterator and placed on the prefetch target device. Move the construction of that op outside the `with device():` block to fix this. Also enable the relevant test to run as a CUDA test. * Import the cuda_py_test rule. --- tensorflow/contrib/data/python/kernel_tests/BUILD | 7 +++---- tensorflow/contrib/data/python/ops/prefetching_ops.py | 6 ++++-- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD index 7270d533c6..fa5662ce0b 100644 --- a/tensorflow/contrib/data/python/kernel_tests/BUILD +++ b/tensorflow/contrib/data/python/kernel_tests/BUILD @@ -4,7 +4,7 @@ licenses(["notice"]) # Apache 2.0 exports_files(["LICENSE"]) -load("//tensorflow:tensorflow.bzl", "py_test", "tf_py_test") +load("//tensorflow:tensorflow.bzl", "cuda_py_test", "py_test", "tf_py_test") py_test( name = "batch_dataset_op_test", @@ -473,12 +473,11 @@ py_test( ], ) -py_test( +cuda_py_test( name = "prefetching_ops_test", size = "small", srcs = ["prefetching_ops_test.py"], - srcs_version = "PY2AND3", - deps = [ + additional_deps = [ "//tensorflow/contrib/data/python/ops:prefetching_ops", "//tensorflow/core:protos_all_py", "//tensorflow/python:client_testlib", diff --git a/tensorflow/contrib/data/python/ops/prefetching_ops.py b/tensorflow/contrib/data/python/ops/prefetching_ops.py index 89c04dc89a..e4c9f8b58a 100644 --- a/tensorflow/contrib/data/python/ops/prefetching_ops.py +++ b/tensorflow/contrib/data/python/ops/prefetching_ops.py @@ -114,11 +114,13 @@ class _PrefetchToDeviceIterator(object): ret = remote_iterator.get_next() return nest.flatten(sparse.serialize_sparse_tensors(ret)) + iterator_device = gen_dataset_ops.iterator_get_device( + self._input_iterator._iterator_resource) + with ops.device(device): self._buffering_resource = function_buffering_resource( f=_prefetch_fn, - target_device=gen_dataset_ops.iterator_get_device( - self._input_iterator._iterator_resource), + target_device=iterator_device, string_arg=input_iterator_handle, buffer_size=buffer_size, shared_name=shared_name) -- GitLab From 77586aefab8f5be9677659099ebe5467559c2d37 Mon Sep 17 00:00:00 2001 From: Yifei Feng Date: Tue, 17 Apr 2018 16:18:07 -0700 Subject: [PATCH 035/434] Update version strings for 1.8.0rc1. --- tensorflow/core/public/version.h | 2 +- tensorflow/docs_src/install/install_c.md | 2 +- tensorflow/docs_src/install/install_go.md | 2 +- tensorflow/docs_src/install/install_java.md | 22 +++++++++---------- tensorflow/docs_src/install/install_linux.md | 22 +++++++++---------- tensorflow/docs_src/install/install_mac.md | 10 ++++----- .../docs_src/install/install_sources.md | 4 ++-- tensorflow/tools/pip_package/setup.py | 2 +- 8 files changed, 33 insertions(+), 33 deletions(-) diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index 0ca7d8475f..ba69efb289 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -24,7 +24,7 @@ limitations under the License. // TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1", // "-beta", "-rc", "-rc.1") -#define TF_VERSION_SUFFIX "-rc0" +#define TF_VERSION_SUFFIX "-rc1" #define TF_STR_HELPER(x) #x #define TF_STR(x) TF_STR_HELPER(x) diff --git a/tensorflow/docs_src/install/install_c.md b/tensorflow/docs_src/install/install_c.md index 995b8ae666..8c165aad52 100644 --- a/tensorflow/docs_src/install/install_c.md +++ b/tensorflow/docs_src/install/install_c.md @@ -38,7 +38,7 @@ enable TensorFlow for C: OS="linux" # Change to "darwin" for macOS TARGET_DIRECTORY="/usr/local" curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.8.0-rc0.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.8.0-rc1.tar.gz" | sudo tar -C $TARGET_DIRECTORY -xz The `tar` command extracts the TensorFlow C library into the `lib` diff --git a/tensorflow/docs_src/install/install_go.md b/tensorflow/docs_src/install/install_go.md index 2938a8f7ee..26cbcc9a9b 100644 --- a/tensorflow/docs_src/install/install_go.md +++ b/tensorflow/docs_src/install/install_go.md @@ -38,7 +38,7 @@ steps to install this library and enable TensorFlow for Go: TF_TYPE="cpu" # Change to "gpu" for GPU support TARGET_DIRECTORY='/usr/local' curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.8.0-rc0.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.8.0-rc1.tar.gz" | sudo tar -C $TARGET_DIRECTORY -xz The `tar` command extracts the TensorFlow C library into the `lib` diff --git a/tensorflow/docs_src/install/install_java.md b/tensorflow/docs_src/install/install_java.md index c87eacfa93..1b0bbdba7b 100644 --- a/tensorflow/docs_src/install/install_java.md +++ b/tensorflow/docs_src/install/install_java.md @@ -36,7 +36,7 @@ following to the project's `pom.xml` to use the TensorFlow Java APIs: org.tensorflow tensorflow - 1.8.0-rc0 + 1.8.0-rc1 ``` @@ -65,7 +65,7 @@ As an example, these steps will create a Maven project that uses TensorFlow: org.tensorflow tensorflow - 1.8.0-rc0 + 1.8.0-rc1 @@ -123,12 +123,12 @@ instead: org.tensorflow libtensorflow - 1.8.0-rc0 + 1.8.0-rc1 org.tensorflow libtensorflow_jni_gpu - 1.8.0-rc0 + 1.8.0-rc1 ``` @@ -147,7 +147,7 @@ refer to the simpler instructions above instead. Take the following steps to install TensorFlow for Java on Linux or macOS: 1. Download - [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.8.0-rc0.jar), + [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.8.0-rc1.jar), which is the TensorFlow Java Archive (JAR). 2. Decide whether you will run TensorFlow for Java on CPU(s) only or with @@ -166,7 +166,7 @@ Take the following steps to install TensorFlow for Java on Linux or macOS: OS=$(uname -s | tr '[:upper:]' '[:lower:]') mkdir -p ./jni curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.8.0-rc0.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.8.0-rc1.tar.gz" | tar -xz -C ./jni ### Install on Windows @@ -174,10 +174,10 @@ Take the following steps to install TensorFlow for Java on Linux or macOS: Take the following steps to install TensorFlow for Java on Windows: 1. Download - [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.8.0-rc0.jar), + [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.8.0-rc1.jar), which is the TensorFlow Java Archive (JAR). 2. Download the following Java Native Interface (JNI) file appropriate for - [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.8.0-rc0.zip). + [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.8.0-rc1.zip). 3. Extract this .zip file. @@ -225,7 +225,7 @@ must be part of your `classpath`. For example, you can include the downloaded `.jar` in your `classpath` by using the `-cp` compilation flag as follows: -
javac -cp libtensorflow-1.8.0-rc0.jar HelloTF.java
+
javac -cp libtensorflow-1.8.0-rc1.jar HelloTF.java
### Running @@ -239,11 +239,11 @@ two files are available to the JVM: For example, the following command line executes the `HelloTF` program on Linux and macOS X: -
java -cp libtensorflow-1.8.0-rc0.jar:. -Djava.library.path=./jni HelloTF
+
java -cp libtensorflow-1.8.0-rc1.jar:. -Djava.library.path=./jni HelloTF
And the following command line executes the `HelloTF` program on Windows: -
java -cp libtensorflow-1.8.0-rc0.jar;. -Djava.library.path=jni HelloTF
+
java -cp libtensorflow-1.8.0-rc1.jar;. -Djava.library.path=jni HelloTF
If the program prints Hello from version, you've successfully installed TensorFlow for Java and are ready to use the API. If the program diff --git a/tensorflow/docs_src/install/install_linux.md b/tensorflow/docs_src/install/install_linux.md index 8387289fcf..f19f827e25 100644 --- a/tensorflow/docs_src/install/install_linux.md +++ b/tensorflow/docs_src/install/install_linux.md @@ -194,7 +194,7 @@ Take the following steps to install TensorFlow with Virtualenv: Virtualenv environment:
(tensorflow)$ pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc0-cp34-cp34m-linux_x86_64.whl
+ https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc1-cp34-cp34m-linux_x86_64.whl If you encounter installation problems, see [Common Installation Problems](#common_installation_problems). @@ -299,7 +299,7 @@ take the following steps:
      $ sudo pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc0-cp34-cp34m-linux_x86_64.whl
+     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc1-cp34-cp34m-linux_x86_64.whl
      
If this step fails, see @@ -485,7 +485,7 @@ Take the following steps to install TensorFlow in an Anaconda environment:
      (tensorflow)$ pip install --ignore-installed --upgrade \
-     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc0-cp34-cp34m-linux_x86_64.whl
+ https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc1-cp34-cp34m-linux_x86_64.whl ## Validate your installation @@ -659,14 +659,14 @@ This section documents the relevant values for Linux installations. CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc0-cp27-none-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc1-cp27-none-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0rc0-cp27-none-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0rc1-cp27-none-linux_x86_64.whl
 
Note that GPU support requires the NVIDIA hardware and software described in @@ -678,14 +678,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc0-cp34-cp34m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc1-cp34-cp34m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0rc0-cp34-cp34m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0rc1-cp34-cp34m-linux_x86_64.whl
 
Note that GPU support requires the NVIDIA hardware and software described in @@ -697,14 +697,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc0-cp35-cp35m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc1-cp35-cp35m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0rc0-cp35-cp35m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0rc1-cp35-cp35m-linux_x86_64.whl
 
@@ -716,14 +716,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc0-cp36-cp36m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc1-cp36-cp36m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0rc0-cp36-cp36m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0rc1-cp36-cp36m-linux_x86_64.whl
 
diff --git a/tensorflow/docs_src/install/install_mac.md b/tensorflow/docs_src/install/install_mac.md index a237d1af54..ff6c2f5e44 100644 --- a/tensorflow/docs_src/install/install_mac.md +++ b/tensorflow/docs_src/install/install_mac.md @@ -119,7 +119,7 @@ Take the following steps to install TensorFlow with Virtualenv: TensorFlow in the active Virtualenv is as follows:
 $ pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc0-py3-none-any.whl
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc1-py3-none-any.whl If you encounter installation problems, see [Common Installation Problems](#common-installation-problems). @@ -242,7 +242,7 @@ take the following steps: issue the following command:
 $ sudo pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc0-py3-none-any.whl 
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc1-py3-none-any.whl If the preceding command fails, see [installation problems](#common-installation-problems). @@ -350,7 +350,7 @@ Take the following steps to install TensorFlow in an Anaconda environment: TensorFlow for Python 2.7:
 (targetDirectory)$ pip install --ignore-installed --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc0-py2-none-any.whl
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc1-py2-none-any.whl @@ -524,7 +524,7 @@ The value you specify depends on your Python version.
-https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc0-py2-none-any.whl
+https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc1-py2-none-any.whl
 
@@ -532,5 +532,5 @@ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc0-py2-none-a
-https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc0-py3-none-any.whl
+https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc1-py3-none-any.whl
 
diff --git a/tensorflow/docs_src/install/install_sources.md b/tensorflow/docs_src/install/install_sources.md index 677e3329b6..d48a6ee550 100644 --- a/tensorflow/docs_src/install/install_sources.md +++ b/tensorflow/docs_src/install/install_sources.md @@ -350,10 +350,10 @@ Invoke `pip install` to install that pip package. The filename of the `.whl` file depends on your platform. For example, the following command will install the pip package -for TensorFlow 1.8.0rc0 on Linux: +for TensorFlow 1.8.0rc1 on Linux:
-$ sudo pip install /tmp/tensorflow_pkg/tensorflow-1.8.0rc0-py2-none-any.whl
+$ sudo pip install /tmp/tensorflow_pkg/tensorflow-1.8.0rc1-py2-none-any.whl
 
## Validate your installation diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py index f676f040ad..6da3223d33 100644 --- a/tensorflow/tools/pip_package/setup.py +++ b/tensorflow/tools/pip_package/setup.py @@ -29,7 +29,7 @@ from setuptools.dist import Distribution # This version string is semver compatible, but incompatible with pip. # For pip, we will remove all '-' characters from this string, and use the # result for pip. -_VERSION = '1.8.0-rc0' +_VERSION = '1.8.0-rc1' REQUIRED_PACKAGES = [ 'absl-py >= 0.1.6', -- GitLab From cf836c4f6b3067bda14dd0ee8455d99c19423d5a Mon Sep 17 00:00:00 2001 From: Junpeng Lao Date: Wed, 18 Apr 2018 10:45:49 +0200 Subject: [PATCH 036/434] Add test --- .../python/kernel_tests/bijectors/ordered_test.py | 14 +++++++------- .../distributions/python/ops/bijectors/ordered.py | 8 +++++--- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/ordered_test.py b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/ordered_test.py index 1bcbfed6c3..2d49b4294e 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/ordered_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/ordered_test.py @@ -38,12 +38,12 @@ class OrderedBijectorTest(test.TestCase): with self.test_session(): ordered = Ordered() self.assertEqual("ordered", ordered.name) - x = np.log([[2., 3, 4], [4., 8, 12]]) - y = [[0.2, 0.3, 0.4, 0.1], [0.16, 0.32, 0.48, 0.04]] + x = np.asarray([[2., 3, 4], [4., 8, 13]]) + y = [[2., 0, 0], [4., np.log(4.), np.log(5.)]] self.assertAllClose(y, ordered.forward(x).eval()) self.assertAllClose(x, ordered.inverse(y).eval()) self.assertAllClose( - -np.sum(np.log(y), axis=1), + -np.sum(y[..., 1:], axis=-1), ordered.inverse_log_det_jacobian(y, event_ndims=1).eval(), atol=0., rtol=1e-7) @@ -58,15 +58,15 @@ class OrderedBijectorTest(test.TestCase): ordered = Ordered() self.assertEqual("ordered", ordered.name) x = array_ops.placeholder(shape=[2, None], dtype=dtypes.float32) - real_x = np.log([[2., 3, 4], [4., 8, 12]]) + real_x = np.asarray([[2., 3, 4], [4., 8, 13]]) y = array_ops.placeholder(shape=[2, None], dtype=dtypes.float32) - real_y = [[0.2, 0.3, 0.4, 0.1], [0.16, 0.32, 0.48, 0.04]] + real_y = [[2., 0, 0], [4., np.log(4.), np.log(5.)]] self.assertAllClose(real_y, ordered.forward(x).eval( feed_dict={x: real_x})) self.assertAllClose(real_x, ordered.inverse(y).eval( feed_dict={y: real_y})) self.assertAllClose( - -np.sum(np.log(real_y), axis=1), + -np.sum(y[..., 1:], axis=-1), ordered.inverse_log_det_jacobian(y, event_ndims=1).eval( feed_dict={y: real_y}), atol=0., @@ -82,7 +82,7 @@ class OrderedBijectorTest(test.TestCase): def testShapeGetters(self): with self.test_session(): x = tensor_shape.TensorShape([4]) - y = tensor_shape.TensorShape([5]) + y = tensor_shape.TensorShape([4]) bijector = Ordered(validate_args=True) self.assertAllEqual(y, bijector.forward_event_shape(x)) self.assertAllEqual(y.as_list(), diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/ordered.py b/tensorflow/contrib/distributions/python/ops/bijectors/ordered.py index ec8f660144..64cf2e6b56 100644 --- a/tensorflow/contrib/distributions/python/ops/bijectors/ordered.py +++ b/tensorflow/contrib/distributions/python/ops/bijectors/ordered.py @@ -37,6 +37,9 @@ class Ordered(bijector.Bijector): """Bijector which maps a tensor x_k that has increasing elements in the last dimension to an unconstrained tensor y_k. + The inverse of the bijector applied to a normal random vector `X ~ N(0, 1)` + gives back a sorted random vector with the same distribution `Y ~ N(0, 1)` + On the last dimension of the tensor, Ordered bijector performs: `y[0] = x[0]` `y[1:] = math_ops.log(x[1:] - x[:-1])` @@ -79,7 +82,6 @@ class Ordered(bijector.Bijector): def _inverse_event_shape_tensor(self, output_shape): if self.validate_args: - # It is not possible for a negative shape so we need only check <= 1. is_greater_one = check_ops.assert_greater( output_shape[-1], 1, message="Need last dimension greater than 1.") output_shape = control_flow_ops.with_dependencies( @@ -108,7 +110,7 @@ class Ordered(bijector.Bijector): def _maybe_assert_valid_x(self, x): if not self.validate_args: return x - is_valid = check_ops.is_strictly_increasing( - x, + is_valid = check_ops.assert_positive( + x[..., 1:] - x[..., :-1], message="Forward transformation input must be strictly increasing.") return control_flow_ops.with_dependencies([is_valid], x) \ No newline at end of file -- GitLab From 5c52028c7337baafd8d92d36a29e0fa088393d06 Mon Sep 17 00:00:00 2001 From: Junpeng Lao Date: Wed, 18 Apr 2018 12:38:41 +0200 Subject: [PATCH 037/434] add forward logdet jacobian --- .../kernel_tests/bijectors/ordered_test.py | 14 ++++---------- .../python/ops/bijectors/ordered.py | 18 ++++++++++-------- 2 files changed, 14 insertions(+), 18 deletions(-) diff --git a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/ordered_test.py b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/ordered_test.py index 2d49b4294e..63c8f1fb31 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/ordered_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/ordered_test.py @@ -43,7 +43,7 @@ class OrderedBijectorTest(test.TestCase): self.assertAllClose(y, ordered.forward(x).eval()) self.assertAllClose(x, ordered.inverse(y).eval()) self.assertAllClose( - -np.sum(y[..., 1:], axis=-1), + np.sum(np.asarray(y)[..., 1:], axis=-1), ordered.inverse_log_det_jacobian(y, event_ndims=1).eval(), atol=0., rtol=1e-7) @@ -66,7 +66,7 @@ class OrderedBijectorTest(test.TestCase): self.assertAllClose(real_x, ordered.inverse(y).eval( feed_dict={y: real_y})) self.assertAllClose( - -np.sum(y[..., 1:], axis=-1), + np.sum(np.asarray(real_y)[..., 1:], axis=-1), ordered.inverse_log_det_jacobian(y, event_ndims=1).eval( feed_dict={y: real_y}), atol=0., @@ -96,14 +96,8 @@ class OrderedBijectorTest(test.TestCase): def testBijectiveAndFinite(self): with self.test_session(): ordered = Ordered() - x = np.linspace(-50, 50, num=10).reshape(5, 2).astype(np.float32) - # Make y values on the simplex with a wide range. - y_0 = np.ones(5).astype(np.float32) - y_1 = (1e-5 * rng.rand(5)).astype(np.float32) - y_2 = (1e1 * rng.rand(5)).astype(np.float32) - y = np.array([y_0, y_1, y_2]) - y /= y.sum(axis=0) - y = y.T # y.shape = [5, 3] + x = np.sort(rng.randn(3, 10), axis=-1).astype(np.float32) + y = (rng.randn(3, 10)).astype(np.float32) assert_bijective_and_finite(ordered, x, y, event_ndims=1) diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/ordered.py b/tensorflow/contrib/distributions/python/ops/bijectors/ordered.py index 64cf2e6b56..b2959cce31 100644 --- a/tensorflow/contrib/distributions/python/ops/bijectors/ordered.py +++ b/tensorflow/contrib/distributions/python/ops/bijectors/ordered.py @@ -24,7 +24,6 @@ from tensorflow.python.ops import array_ops from tensorflow.python.ops import check_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops -from tensorflow.python.ops import nn_ops from tensorflow.python.ops.distributions import bijector @@ -37,8 +36,9 @@ class Ordered(bijector.Bijector): """Bijector which maps a tensor x_k that has increasing elements in the last dimension to an unconstrained tensor y_k. - The inverse of the bijector applied to a normal random vector `X ~ N(0, 1)` - gives back a sorted random vector with the same distribution `Y ~ N(0, 1)` + The inverse of the bijector applied to a normal random vector `y ~ N(0, 1)` + gives back a sorted random vector with the same distribution `x ~ N(0, 1)` + where `x = sort(y)` On the last dimension of the tensor, Ordered bijector performs: `y[0] = x[0]` @@ -47,11 +47,11 @@ class Ordered(bijector.Bijector): Example Use: ```python - bijector.Ordered().forward(tf.log([2, 3, 4])) - # Result: [0.6931472, 3.6931472, 7.693147] + bijector.Ordered().forward([2, 3, 4]) + # Result: [2., 0., 0.] - bijector.Ordered().inverse([0.2, 0.3, 0.4]) - # Result: tf.log([2, 3, 4]) + bijector.Ordered().inverse([0.06428002, -1.07774478, -0.71530371]) + # Result: [0.06428002, 0.40464228, 0.8936858] ``` """ @@ -105,7 +105,9 @@ class Ordered(bijector.Bijector): return math_ops.reduce_sum(y[..., 1:], axis=-1) def _forward_log_det_jacobian(self, x): - pass + return -math_ops.reduce_sum( + math_ops.log(x[..., 1:] - x[..., :-1]), + axis=-1) def _maybe_assert_valid_x(self, x): if not self.validate_args: -- GitLab From 5dd4bf753b8f708db69a7ab455a25fb0bb9821a5 Mon Sep 17 00:00:00 2001 From: Martin Wicke <577277+martinwicke@users.noreply.github.com> Date: Tue, 17 Apr 2018 11:54:48 -0700 Subject: [PATCH 038/434] Merge pull request #18601 from yongtang/18598-tf.compat.as_str Fix tf.compat.as_str returns bytes issue in Python 3 --- tensorflow/python/util/compat.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/util/compat.py b/tensorflow/python/util/compat.py index 4163fcac79..738479c946 100644 --- a/tensorflow/python/util/compat.py +++ b/tensorflow/python/util/compat.py @@ -45,7 +45,6 @@ from tensorflow.python.util.tf_export import tf_export from tensorflow.python.util.tf_export import tf_export -@tf_export('compat.as_bytes', 'compat.as_str') def as_bytes(bytes_or_text, encoding='utf-8'): """Converts either bytes or unicode to `bytes`, using utf-8 encoding for text. @@ -68,7 +67,6 @@ def as_bytes(bytes_or_text, encoding='utf-8'): (bytes_or_text,)) -@tf_export('compat.as_text') def as_text(bytes_or_text, encoding='utf-8'): """Returns the given argument as a unicode string. @@ -93,8 +91,12 @@ def as_text(bytes_or_text, encoding='utf-8'): # Convert an object to a `str` in both Python 2 and 3. if _six.PY2: as_str = as_bytes + tf_export('compat.as_bytes', 'compat.as_str')(as_bytes) + tf_export('compat.as_text')(as_text) else: as_str = as_text + tf_export('compat.as_bytes')(as_bytes) + tf_export('compat.as_text', 'compat.as_str')(as_text) @tf_export('compat.as_str_any') -- GitLab From 48589205460a876a9ac783bd9b7fc3af99f8defb Mon Sep 17 00:00:00 2001 From: Michael Case Date: Wed, 18 Apr 2018 10:58:56 -0700 Subject: [PATCH 039/434] Fix issue where git_tag_override would fail if "-" in tag name. --- tensorflow/tools/git/gen_git_source.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tensorflow/tools/git/gen_git_source.py b/tensorflow/tools/git/gen_git_source.py index db2580755b..7f0f325119 100755 --- a/tensorflow/tools/git/gen_git_source.py +++ b/tensorflow/tools/git/gen_git_source.py @@ -164,14 +164,18 @@ def get_git_version(git_base_path, git_tag_override): "git", str("--git-dir=%s/.git" % git_base_path), str("--work-tree=" + git_base_path), "describe", "--long", "--tags" ]).strip()) - if git_tag_override: + if git_tag_override and val: split_val = val.split("-") - if len(split_val) != 3: + if len(split_val) < 3: raise Exception( ("Expected git version in format 'TAG-COMMITS AFTER TAG-HASH' " "but got '%s'") % val) - split_val[0] = git_tag_override - val = bytes("-".join(split_val)) + # There might be "-" in the tag name. But we can be sure that the final + # two "-" are those inserted by the git describe command. + commits_ahead_of_tag = split_val[-2] + abbrev_commit = split_val[-1] + val = bytes( + "-".join([git_tag_override, commits_ahead_of_tag, abbrev_commit])) return val if val else unknown_label except subprocess.CalledProcessError: return unknown_label -- GitLab From 5994156438a8d863dab04161589b34a3d0eb01d6 Mon Sep 17 00:00:00 2001 From: Michael Case Date: Wed, 18 Apr 2018 11:26:40 -0700 Subject: [PATCH 040/434] Fix gen_git_version script not being able to find git binary. This error is happening on our Window's release builds. Making sure we add git binary to the PATH for Bazel. --- tensorflow/tools/ci_build/windows/bazel/common_env.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/tools/ci_build/windows/bazel/common_env.sh b/tensorflow/tools/ci_build/windows/bazel/common_env.sh index 7d4cc7ac30..0e6c0227b7 100644 --- a/tensorflow/tools/ci_build/windows/bazel/common_env.sh +++ b/tensorflow/tools/ci_build/windows/bazel/common_env.sh @@ -44,6 +44,8 @@ export PYTHON_LIB_PATH="C:/${PYTHON_BASE_PATH}/lib/site-packages" # Add python into PATH, it's needed because gen_git_source.py uses # '/usr/bin/env python' as a shebang export PATH="/c/${PYTHON_BASE_PATH}:$PATH" +# Add git into PATH needed for gen_git_source.py +export PATH="/c/Program Files/Git/cmd:$PATH" # Make sure we have pip in PATH export PATH="/c/${PYTHON_BASE_PATH}/Scripts:$PATH" -- GitLab From 529c56d88f27337d6be263b6f61a2a7a1994bb2d Mon Sep 17 00:00:00 2001 From: Yifei Feng <1192265+yifeif@users.noreply.github.com> Date: Wed, 18 Apr 2018 15:33:39 -0700 Subject: [PATCH 041/434] Add --test_output=errors as default --- tensorflow/tools/ci_build/ci_parameterized_build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/tools/ci_build/ci_parameterized_build.sh b/tensorflow/tools/ci_build/ci_parameterized_build.sh index 9d23b508aa..797e0a6db5 100755 --- a/tensorflow/tools/ci_build/ci_parameterized_build.sh +++ b/tensorflow/tools/ci_build/ci_parameterized_build.sh @@ -237,7 +237,7 @@ function get_cuda_capability_version() { CTYPE=${TF_BUILD_CONTAINER_TYPE} # Determine if the machine is a Mac -OPT_FLAG="" +OPT_FLAG="--test_output=errors" if [[ "$(uname -s)" == "Darwin" ]]; then DO_DOCKER=0 -- GitLab From 8cfbbafc17c8baaad47f2a12508c3bee9c8fcda4 Mon Sep 17 00:00:00 2001 From: fo40225 Date: Thu, 12 Apr 2018 09:41:48 +0800 Subject: [PATCH 042/434] fix tf.GIT_VERSION always 'unknown' on windows cmake build (#16730) --- .../contrib/cmake/tf_core_framework.cmake | 2 +- tensorflow/tools/git/gen_git_source.py | 37 +++++++++++++------ 2 files changed, 26 insertions(+), 13 deletions(-) diff --git a/tensorflow/contrib/cmake/tf_core_framework.cmake b/tensorflow/contrib/cmake/tf_core_framework.cmake index 73cadc58ff..973c191c47 100644 --- a/tensorflow/contrib/cmake/tf_core_framework.cmake +++ b/tensorflow/contrib/cmake/tf_core_framework.cmake @@ -276,7 +276,7 @@ add_custom_command(OUTPUT __force_rebuild COMMAND ${CMAKE_COMMAND} -E echo) add_custom_command(OUTPUT ${VERSION_INFO_CC} COMMAND ${PYTHON_EXECUTABLE} ${tensorflow_source_dir}/tensorflow/tools/git/gen_git_source.py - ARGS --raw_generate ${VERSION_INFO_CC} --git_tag_override=${GIT_TAG_OVERRIDE} + ARGS --raw_generate ${VERSION_INFO_CC} --source_dir ${tensorflow_source_dir} --git_tag_override=${GIT_TAG_OVERRIDE} DEPENDS __force_rebuild) set(tf_version_srcs ${tensorflow_source_dir}/tensorflow/core/util/version_info.cc) diff --git a/tensorflow/tools/git/gen_git_source.py b/tensorflow/tools/git/gen_git_source.py index 7f0f325119..2151a75e84 100755 --- a/tensorflow/tools/git/gen_git_source.py +++ b/tensorflow/tools/git/gen_git_source.py @@ -164,18 +164,14 @@ def get_git_version(git_base_path, git_tag_override): "git", str("--git-dir=%s/.git" % git_base_path), str("--work-tree=" + git_base_path), "describe", "--long", "--tags" ]).strip()) - if git_tag_override and val: + if git_tag_override: split_val = val.split("-") - if len(split_val) < 3: + if len(split_val) != 3: raise Exception( ("Expected git version in format 'TAG-COMMITS AFTER TAG-HASH' " "but got '%s'") % val) - # There might be "-" in the tag name. But we can be sure that the final - # two "-" are those inserted by the git describe command. - commits_ahead_of_tag = split_val[-2] - abbrev_commit = split_val[-1] - val = bytes( - "-".join([git_tag_override, commits_ahead_of_tag, abbrev_commit])) + split_val[0] = git_tag_override + val = bytes("-".join(split_val)) return val if val else unknown_label except subprocess.CalledProcessError: return unknown_label @@ -193,7 +189,15 @@ def write_version_info(filename, git_version): contents = """/* Generated by gen_git_source.py */ #include const char* tf_git_version() {return "%s";} -const char* tf_compiler_version() {return __VERSION__;} +const char* tf_compiler_version() { +#ifdef _MSC_VER +#define STRINGIFY(x) #x +#define TOSTRING(x) STRINGIFY(x) + return "MSVC " TOSTRING(_MSC_FULL_VER); +#else + return __VERSION__; +#endif +} const int tf_cxx11_abi_flag() { #ifdef _GLIBCXX_USE_CXX11_ABI return _GLIBCXX_USE_CXX11_ABI; @@ -257,7 +261,7 @@ def generate(arglist, git_tag_override=None): write_version_info(dest_file, git_version) -def raw_generate(output_file, git_tag_override=None): +def raw_generate(output_file, source_dir, git_tag_override=None): """Simple generator used for cmake/make build systems. This does not create any symlinks. It requires the build system @@ -265,12 +269,13 @@ def raw_generate(output_file, git_tag_override=None): Args: output_file: Output filename for the version info cc + source_dir: Base path of the source code git_tag_override: Override the value for the git tag. This is useful for releases where we want to build the release before the git tag is created. """ - git_version = get_git_version(".", git_tag_override) + git_version = get_git_version(source_dir, git_tag_override) write_version_info(output_file, git_version) @@ -308,6 +313,11 @@ parser.add_argument( type=str, help="Generate version_info.cc (simpler version used for cmake/make)") +parser.add_argument( + "--source_dir", + type=str, + help="Base path of the source code (used for cmake/make)") + args = parser.parse_args() if args.configure is not None: @@ -317,7 +327,10 @@ if args.configure is not None: elif args.generate is not None: generate(args.generate, args.git_tag_override) elif args.raw_generate is not None: - raw_generate(args.raw_generate, args.git_tag_override) + source_path = "." + if args.source_dir is not None: + source_path = args.source_dir + raw_generate(args.raw_generate, source_path, args.git_tag_override) else: raise RuntimeError("--configure or --generate or --raw_generate " "must be used") -- GitLab From 558b3d35f080163b4f8cf8b4997d9e2cc0c4fd6e Mon Sep 17 00:00:00 2001 From: Yifei Feng Date: Wed, 18 Apr 2018 17:42:42 -0700 Subject: [PATCH 043/434] Fix merge. --- tensorflow/tools/git/gen_git_source.py | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/tensorflow/tools/git/gen_git_source.py b/tensorflow/tools/git/gen_git_source.py index 2151a75e84..6ec162e4a9 100755 --- a/tensorflow/tools/git/gen_git_source.py +++ b/tensorflow/tools/git/gen_git_source.py @@ -164,14 +164,17 @@ def get_git_version(git_base_path, git_tag_override): "git", str("--git-dir=%s/.git" % git_base_path), str("--work-tree=" + git_base_path), "describe", "--long", "--tags" ]).strip()) - if git_tag_override: + if git_tag_override and val: split_val = val.split("-") - if len(split_val) != 3: + if len(split_val) < 3: raise Exception( ("Expected git version in format 'TAG-COMMITS AFTER TAG-HASH' " "but got '%s'") % val) - split_val[0] = git_tag_override - val = bytes("-".join(split_val)) + # There might be "-" in the tag name. But we can be sure that the final + # two "-" are those inserted by the git describe command. + abbrev_commit = split_val[-1] + val = bytes( + "-".join([git_tag_override, "0", abbrev_commit])) return val if val else unknown_label except subprocess.CalledProcessError: return unknown_label @@ -189,15 +192,7 @@ def write_version_info(filename, git_version): contents = """/* Generated by gen_git_source.py */ #include const char* tf_git_version() {return "%s";} -const char* tf_compiler_version() { -#ifdef _MSC_VER -#define STRINGIFY(x) #x -#define TOSTRING(x) STRINGIFY(x) - return "MSVC " TOSTRING(_MSC_FULL_VER); -#else - return __VERSION__; -#endif -} +const char* tf_compiler_version() {return __VERSION__;} const int tf_cxx11_abi_flag() { #ifdef _GLIBCXX_USE_CXX11_ABI return _GLIBCXX_USE_CXX11_ABI; @@ -333,4 +328,4 @@ elif args.raw_generate is not None: raw_generate(args.raw_generate, source_path, args.git_tag_override) else: raise RuntimeError("--configure or --generate or --raw_generate " - "must be used") + "must be used") \ No newline at end of file -- GitLab From 0b3950d67bcb07c11f87bd3c2da554017bff0674 Mon Sep 17 00:00:00 2001 From: imsheridan Date: Fri, 20 Apr 2018 00:35:54 +0800 Subject: [PATCH 044/434] Fix code block rendering in several api definitions --- tensorflow/core/api_def/base_api/api_def_Pad.pbtxt | 1 + tensorflow/core/api_def/base_api/api_def_QuantizeV2.pbtxt | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/tensorflow/core/api_def/base_api/api_def_Pad.pbtxt b/tensorflow/core/api_def/base_api/api_def_Pad.pbtxt index e45e2375eb..ee4aad7899 100644 --- a/tensorflow/core/api_def/base_api/api_def_Pad.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_Pad.pbtxt @@ -24,5 +24,6 @@ pad(t, paddings) ==> [[0, 0, 0, 0, 0, 0] [0, 0, 2, 2, 0, 0] [0, 0, 0, 0, 0, 0]] ``` + END } diff --git a/tensorflow/core/api_def/base_api/api_def_QuantizeV2.pbtxt b/tensorflow/core/api_def/base_api/api_def_QuantizeV2.pbtxt index b9e75caf02..37ac10dddb 100644 --- a/tensorflow/core/api_def/base_api/api_def_QuantizeV2.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_QuantizeV2.pbtxt @@ -44,6 +44,7 @@ In 'MIN_COMBINED' mode, each value of the tensor will undergo the following: out[i] = (in[i] - min_range) * range(T) / (max_range - min_range) if T == qint8, out[i] -= (range(T) + 1) / 2.0 ``` + here `range(T) = numeric_limits::max() - numeric_limits::min()` *MIN_COMBINED Mode Example* @@ -87,6 +88,7 @@ choosing to elide the lowest possible value for symmetry (e.g., output range is We first find the range of values in our tensor. The range we use is always centered on 0, so we find m such that + ```c++ m = max(abs(input_min), abs(input_max)) ``` @@ -95,6 +97,7 @@ Our input tensor range is then `[-m, m]`. Next, we choose our fixed-point quantization buckets, `[min_fixed, max_fixed]`. If T is signed, this is + ``` num_bits = sizeof(T) * 8 [min_fixed, max_fixed] = @@ -102,16 +105,19 @@ If T is signed, this is ``` Otherwise, if T is unsigned, the fixed-point range is + ``` [min_fixed, max_fixed] = [0, (1 << num_bits) - 1] ``` From this we compute our scaling factor, s: + ```c++ s = (max_fixed - min_fixed) / (2 * m) ``` Now we can quantize the elements of our tensor: + ```c++ result = round(input * s) ``` -- GitLab From c3f5d8c53295d9740c622f5221464c23559747ad Mon Sep 17 00:00:00 2001 From: Yifei Feng <1192265+yifeif@users.noreply.github.com> Date: Thu, 19 Apr 2018 16:02:09 -0700 Subject: [PATCH 045/434] Update install_python3.5_pip_packages.sh --- .../tools/ci_build/install/install_python3.5_pip_packages.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh b/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh index aefc49f604..204a82f647 100755 --- a/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh +++ b/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh @@ -39,6 +39,9 @@ if [[ -z $pip35_version ]]; then fi set -e +pip3.5 install --upgrade setuptools +pip3.5 install --upgrade pip + pip3.5 install --upgrade virtualenv # Install six. -- GitLab From 8723770b4cbcac0a528354d8508a5ef83716d1fa Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Thu, 19 Apr 2018 19:27:34 -0700 Subject: [PATCH 046/434] [XLA] Remove default argument on virtual function DeviceMemoryAllocator::Allocate(). Default args on virtual functions are disallowed by the Google style guide, for good reason. They have the extremely surprising behavior that the defaults you get when calling a function on a pointer depend not on the underlying type of the object, but on whatever is the semantic type of the pointer! PiperOrigin-RevId: 193611213 --- .../xla/service/device_memory_allocator.h | 30 ++++++++++++++----- .../xla/tests/local_client_test_base.cc | 3 +- 2 files changed, 25 insertions(+), 8 deletions(-) diff --git a/tensorflow/compiler/xla/service/device_memory_allocator.h b/tensorflow/compiler/xla/service/device_memory_allocator.h index 240acf8973..da45c4d45a 100644 --- a/tensorflow/compiler/xla/service/device_memory_allocator.h +++ b/tensorflow/compiler/xla/service/device_memory_allocator.h @@ -38,13 +38,25 @@ class DeviceMemoryAllocator { virtual ~DeviceMemoryAllocator() {} // 'retry_on_failure': If false, and the first attempt to allocate the memory - // fails, the allocation should return immediately without retrying. - // An example use case is optional scratch spaces where a failure - // has only performance impact. + // fails, the allocation should return immediately without retrying. An + // example use case is optional scratch spaces where a failure has only + // performance impact. + // // Allocate() should return a null pointer for a size-0 allocation. // Deallocate() must be a no-op for null pointers. - virtual StatusOr Allocate( - int device_ordinal, uint64 size, bool retry_on_failure = true) = 0; + virtual StatusOr Allocate(int device_ordinal, + uint64 size, + bool retry_on_failure) = 0; + + // Two-arg version of Allocate(), which sets retry-on-failure to true. + // + // (We don't simply use a default argument on the virtual Allocate function + // because default args on virtual functions are disallowed by the Google + // style guide.) + StatusOr Allocate(int device_ordinal, uint64 size) { + return Allocate(device_ordinal, size, /*retry_on_failure=*/true); + } + virtual tensorflow::Status Deallocate(int device_ordinal, se::DeviceMemoryBase* mem) = 0; @@ -67,8 +79,12 @@ class StreamExecutorMemoryAllocator : public DeviceMemoryAllocator { const se::Platform* platform, tensorflow::gtl::ArraySlice stream_executors); - StatusOr Allocate( - int device_ordinal, uint64 size, bool retry_on_failure = true) override; + StatusOr Allocate(int device_ordinal, uint64 size, + bool retry_on_failure) override; + + // Pull in two-arg overload that sets retry_on_failure to true. + using DeviceMemoryAllocator::Allocate; + tensorflow::Status Deallocate(int device_ordinal, se::DeviceMemoryBase* mem) override; diff --git a/tensorflow/compiler/xla/tests/local_client_test_base.cc b/tensorflow/compiler/xla/tests/local_client_test_base.cc index c60ba2422f..bb5aabb214 100644 --- a/tensorflow/compiler/xla/tests/local_client_test_base.cc +++ b/tensorflow/compiler/xla/tests/local_client_test_base.cc @@ -44,7 +44,8 @@ StatusOr TestAllocator::Allocate(int device_ordinal, allocation_count_++; device_allocation_count_[device_ordinal]++; } - return StreamExecutorMemoryAllocator::Allocate(device_ordinal, size); + return StreamExecutorMemoryAllocator::Allocate(device_ordinal, size, + retry_on_failure); } tensorflow::Status TestAllocator::Deallocate(int device_ordinal, -- GitLab From 2a956c9b8f9950405b481ccc0e05636873ecc9ae Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Fri, 20 Apr 2018 02:40:37 +0000 Subject: [PATCH 047/434] Support string tensors for tf.count_nonzero This fix tries to address the issue raised in 18712 where `tf.count_nonzero` does not support string tensors. The implementation of `tf.count_nonzero` relies on `tf.not_equal` which actually support string tensors. The reason the string tensor does not work is because `tf.count_nonzero` created a numpy type `zero` which uses `input_tensor.dtype.as_numpy_dtype()`. The numpy type `zero` is then passed to `tf.not_equal (which converts numpy `zero` into a tensor zero). However, `input_tensor.dtype.as_numpy_dtype()` will converts tf.string to numpy.object thus the exception. But that is not necessary as `zero` could be created with `tf.zeros` directly without back and forth conversion to numpy. This fix fixes the issue. This fix fixes 18712. Signed-off-by: Yong Tang --- tensorflow/python/ops/math_ops.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py index 781b1c557f..8c9ad66b0e 100644 --- a/tensorflow/python/ops/math_ops.py +++ b/tensorflow/python/ops/math_ops.py @@ -1487,7 +1487,8 @@ def count_nonzero(input_tensor, with ops.name_scope(name, "count_nonzero", [input_tensor]): input_tensor = ops.convert_to_tensor(input_tensor, name="input_tensor") - zero = input_tensor.dtype.as_numpy_dtype() + # A scalar of 'zero' is enough as `not_equal` will broadcast. + zero = array_ops.zeros([], dtype=input_tensor.dtype) return cast( reduce_sum( # int64 reduction happens on GPU -- GitLab From 37999ce500f27d587100f0bf45e87957936f5ada Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Fri, 20 Apr 2018 02:48:15 +0000 Subject: [PATCH 048/434] Add test case for tf.string support with tf.count_nonzero Signed-off-by: Yong Tang --- tensorflow/python/kernel_tests/reduction_ops_test.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tensorflow/python/kernel_tests/reduction_ops_test.py b/tensorflow/python/kernel_tests/reduction_ops_test.py index 589ea54973..0be89e1ff4 100644 --- a/tensorflow/python/kernel_tests/reduction_ops_test.py +++ b/tensorflow/python/kernel_tests/reduction_ops_test.py @@ -958,6 +958,12 @@ class CountNonzeroReductionTest(test.TestCase): y = math_ops.count_nonzero(x, [0]) self.assertAllEqual(y.eval(), np.zeros(9938)) + def testStringReduce(self): + # Test case for GitHub issue 18712 + with self.test_session() as sess: + v = math_ops.count_nonzero(constant_op.constant(["test"])) + self.assertAllClose(sess.run(v), 1) + if __name__ == "__main__": test.main() -- GitLab From 7358025743951b42fe0f99fb85b4418769de5357 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Fri, 20 Apr 2018 02:51:54 +0000 Subject: [PATCH 049/434] Add test cases with axis and keepdims for tf.count_nonzero and string Signed-off-by: Yong Tang --- .../python/kernel_tests/reduction_ops_test.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/kernel_tests/reduction_ops_test.py b/tensorflow/python/kernel_tests/reduction_ops_test.py index 0be89e1ff4..943b80b787 100644 --- a/tensorflow/python/kernel_tests/reduction_ops_test.py +++ b/tensorflow/python/kernel_tests/reduction_ops_test.py @@ -889,9 +889,9 @@ class AnyReductionTest(test.TestCase): class CountNonzeroReductionTest(test.TestCase): - def _compare(self, x, reduction_axes, keepdims, use_gpu=False, + def _compare(self, x, reduction_axes, keepdims, use_gpu=False, zero=0, feed_dict=None): - np_ans = (x != 0).astype(np.int32) + np_ans = (x != zero).astype(np.int32) if reduction_axes is None: np_ans = np.sum(np_ans, keepdims=keepdims) else: @@ -964,6 +964,15 @@ class CountNonzeroReductionTest(test.TestCase): v = math_ops.count_nonzero(constant_op.constant(["test"])) self.assertAllClose(sess.run(v), 1) + def testStringReduce1D(self): + # Create a 1D array of strings + x = np.asarray(["", "", "a", "", "", "b"]) + self._compare(x, None, keepdims=False, zero=np.str("")) + self._compare(x, [], keepdims=False, zero=np.str("")) + self._compare(x, [0], keepdims=False, zero=np.str("")) + self._compare(x, None, keepdims=True, zero=np.str("")) + self._compare(x, [], keepdims=True, zero=np.str("")) + self._compare(x, [0], keepdims=True, zero=np.str("")) if __name__ == "__main__": test.main() -- GitLab From 01ab85f0fdce13f98b705c54901284a165ed7bd8 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Fri, 20 Apr 2018 02:53:57 +0000 Subject: [PATCH 050/434] Add n-D test cases for better coverage Signed-off-by: Yong Tang --- .../python/kernel_tests/reduction_ops_test.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tensorflow/python/kernel_tests/reduction_ops_test.py b/tensorflow/python/kernel_tests/reduction_ops_test.py index 943b80b787..ea78b58d88 100644 --- a/tensorflow/python/kernel_tests/reduction_ops_test.py +++ b/tensorflow/python/kernel_tests/reduction_ops_test.py @@ -974,5 +974,21 @@ class CountNonzeroReductionTest(test.TestCase): self._compare(x, [], keepdims=True, zero=np.str("")) self._compare(x, [0], keepdims=True, zero=np.str("")) + def testStringReduce2D(self): + # Create a 2D array of strings + x = np.asarray([["", "", "a", "", "", "b"], + ["", "c", "", "d", "", ""], + ["e", "", "f", "", "", ""]]) + self._compare(x, None, keepdims=False, zero=np.str("")) + self._compare(x, [], keepdims=False, zero=np.str("")) + self._compare(x, [0], keepdims=False, zero=np.str("")) + self._compare(x, [1], keepdims=False, zero=np.str("")) + self._compare(x, [0, 1], keepdims=False, zero=np.str("")) + self._compare(x, None, keepdims=True, zero=np.str("")) + self._compare(x, [], keepdims=True, zero=np.str("")) + self._compare(x, [0], keepdims=True, zero=np.str("")) + self._compare(x, [0, 1], keepdims=True, zero=np.str("")) + + if __name__ == "__main__": test.main() -- GitLab From 4ef9de422d452683ac661d3a6313aeb2972b836d Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Thu, 19 Apr 2018 20:00:21 -0700 Subject: [PATCH 051/434] Always include the local worker in the list of filtered targets. It is currently legal to specify a device filter that doesn't include the local worker. In that case, the MasterSession includes all local devices regardless of the filter. This change extends this behavior to the list of filtered workers, which will be crucial for backwards compatibility when we enable CreateWorkerSession for all MasterSessions, because we need to call CreateWorkerSession on all potential workers. PiperOrigin-RevId: 193613313 --- tensorflow/core/distributed_runtime/master.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/distributed_runtime/master.cc b/tensorflow/core/distributed_runtime/master.cc index 288656e7f8..e60386fd34 100644 --- a/tensorflow/core/distributed_runtime/master.cc +++ b/tensorflow/core/distributed_runtime/master.cc @@ -167,13 +167,16 @@ class DeviceFinder { } // Enumerates all known workers' target. A target name is a // prefix of a device name. E.g., /job:mnist/replica:0/task:10. + CHECK_GT(env_->local_devices.size(), 0) << "No local devices provided."; + const string& local_device_name = env_->local_devices[0]->name(); std::vector workers; worker_cache->ListWorkers(&workers); if (filters_.empty()) { std::swap(workers, targets_); } else { for (const string& name : workers) { - if (MatchFilters(name)) { + if (MatchFilters(name) || + DeviceNameUtils::IsSameAddressSpace(name, local_device_name)) { targets_.push_back(name); } } -- GitLab From 7f3baa210a45cd0b41e21b63c2be6dd54230ea0b Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Fri, 20 Apr 2018 02:55:31 +0000 Subject: [PATCH 052/434] Update doc string for tf.count_nonzero to add string type Signed-off-by: Yong Tang --- tensorflow/python/ops/math_ops.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py index 8c9ad66b0e..31ce83905b 100644 --- a/tensorflow/python/ops/math_ops.py +++ b/tensorflow/python/ops/math_ops.py @@ -1467,7 +1467,8 @@ def count_nonzero(input_tensor, ``` Args: - input_tensor: The tensor to reduce. Should be of numeric type, or `bool`. + input_tensor: The tensor to reduce. Should be of numeric type, `string`, + or `bool`. axis: The dimensions to reduce. If `None` (the default), reduces all dimensions. Must be in the range `[-rank(input_tensor), rank(input_tensor))`. -- GitLab From 2273c4e56334caf31de01c6b6f8f4edd48432972 Mon Sep 17 00:00:00 2001 From: Yifei Feng Date: Thu, 19 Apr 2018 21:33:41 -0700 Subject: [PATCH 053/434] Skip tests with no_oss tag in XLA builds. PiperOrigin-RevId: 193619344 --- tensorflow/tools/ci_build/xla/linux/gpu/run_py3.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/tools/ci_build/xla/linux/gpu/run_py3.sh b/tensorflow/tools/ci_build/xla/linux/gpu/run_py3.sh index a94a627dfb..a410c10b61 100755 --- a/tensorflow/tools/ci_build/xla/linux/gpu/run_py3.sh +++ b/tensorflow/tools/ci_build/xla/linux/gpu/run_py3.sh @@ -35,7 +35,7 @@ echo "build --distinct_host_configuration=false" >> .tf_configure.bazelrc bazel clean # Run bazel test command. Double test timeouts to avoid flakes. -bazel test --config=cuda --test_tag_filters=-no_gpu,-benchmark-test -k \ +bazel test --config=cuda --test_tag_filters=-no_gpu,-benchmark-test,-no_oss -k \ --jobs=${N_JOBS} --test_timeout 300,450,1200,3600 \ --build_tests_only --test_output=errors --local_test_jobs=8 \ --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute \ -- GitLab From 06bb3364795e443206910c98cee132d719cf41e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Fri, 20 Apr 2018 13:33:05 +0800 Subject: [PATCH 054/434] TST: byte string for python3 --- .../python/kernel_tests/scatter_nd_ops_test.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/tensorflow/python/kernel_tests/scatter_nd_ops_test.py b/tensorflow/python/kernel_tests/scatter_nd_ops_test.py index dfe9600dbb..b7477a768a 100644 --- a/tensorflow/python/kernel_tests/scatter_nd_ops_test.py +++ b/tensorflow/python/kernel_tests/scatter_nd_ops_test.py @@ -365,31 +365,35 @@ class ScatterNdTest(test.TestCase): return array_ops.scatter_nd(indices, updates, shape) def testString(self): - indices = constant_op.constant([[4], [3], [1], [7]], dtype=dtypes.int32) + indices = constant_op.constant([[4], [3], [1], [7]], + dtype=dtypes.int32) updates = constant_op.constant(["four", "three", "one", "seven"], dtype=dtypes.string) - expected = np.array(["", "one", "", "three", "four", "", "", "seven"]) + expected = np.array([b"", b"one", b"", b"three", b"four", + b"", b"", b"seven"]) scatter = self.scatter_nd(indices, updates, shape=(8,)) with self.test_session() as sess: result = sess.run(scatter) self.assertAllEqual(expected, result) # Same indice is updated twice by same value. - indices = constant_op.constant([[4], [3], [3], [7]], dtype=dtypes.int32) + indices = constant_op.constant([[4], [3], [3], [7]], + dtype=dtypes.int32) updates = constant_op.constant(["a", "b", "b", "c"], dtype=dtypes.string) - expected = np.array(["", "", "", "bb", "a", "", "", "c"]) + expected = np.array([b"", b"", b"", b"bb", b"a", b"", b"", b"c"]) scatter = self.scatter_nd(indices, updates, shape=(8,)) with self.test_session() as sess: result = sess.run(scatter) self.assertAllEqual(expected, result) # Same indice is updated twice by different value. - indices = constant_op.constant([[4], [3], [3], [7]], dtype=dtypes.int32) + indices = constant_op.constant([[4], [3], [3], [7]], + dtype=dtypes.int32) updates = constant_op.constant(["a", "b", "c", "d"], dtype=dtypes.string) - expected = [np.array(["", "", "", "bc", "a", "", "", "d"]), - np.array(["", "", "", "cb", "a", "", "", "d"])] + expected = [np.array([b"", b"", b"", b"bc", b"a", b"", b"", b"d"]), + np.array([b"", b"", b"", b"cb", b"a", b"", b"", b"d"])] scatter = self.scatter_nd(indices, updates, shape=(8,)) with self.test_session() as sess: result = sess.run(scatter) -- GitLab From 70b8d21edcc84818835c9e2940a5df288c309d45 Mon Sep 17 00:00:00 2001 From: Roy Frostig Date: Thu, 19 Apr 2018 23:01:07 -0700 Subject: [PATCH 055/434] [XLA] Rework the local XLA client's Shape class with separate array and tuple shape constructors. PiperOrigin-RevId: 193624591 --- .../compiler/xla/python/numpy_bridge.cc | 20 +-- tensorflow/compiler/xla/python/xla_client.py | 137 ++++++++++++------ .../compiler/xla/python/xla_client_test.py | 10 +- 3 files changed, 103 insertions(+), 64 deletions(-) diff --git a/tensorflow/compiler/xla/python/numpy_bridge.cc b/tensorflow/compiler/xla/python/numpy_bridge.cc index eec48479c9..dc6f5fe5fc 100644 --- a/tensorflow/compiler/xla/python/numpy_bridge.cc +++ b/tensorflow/compiler/xla/python/numpy_bridge.cc @@ -181,16 +181,6 @@ StatusOr XlaShapeFromPyShape(PyObject* o) { PyObjectCppRepr(o).c_str()); }; - auto get_attr = [o, &error](const string& field) -> StatusOr { - PyObject* result = - PyObject_GetAttrString(o, const_cast(field.c_str())); - if (result == nullptr) { - return error(tensorflow::strings::StrCat( - "Failed to get attribute of Shape object:", field)); - } - return result; - }; - auto call_method = [o, &error](const string& method) -> StatusOr { PyObject* result = PyObject_CallMethod(o, const_cast(method.c_str()), nullptr); @@ -202,12 +192,16 @@ StatusOr XlaShapeFromPyShape(PyObject* o) { }; PyObject* np_type; - TF_ASSIGN_OR_RETURN(np_type, get_attr("np_dtype")); + TF_ASSIGN_OR_RETURN(np_type, call_method("numpy_dtype")); if (np_type->ob_type != &PyArrayDescr_Type) { - return error("Shape attribute np_dtype is not an integer numpy dtype"); + return error( + "Return value of shape method numpy_dtype " + "is not an integer numpy dtype"); } if (!NumpyTypeIsValid(NumpyTypenum(np_type))) { - return error("Shape attribute np_dtype is not a valid integer numpy dtype"); + return error( + "Return value of shape method numpy_dtype " + "is not a valid integer numpy dtype"); } const PrimitiveType element_type = NumpyTypeToPrimitiveType(NumpyTypenum(np_type)); diff --git a/tensorflow/compiler/xla/python/xla_client.py b/tensorflow/compiler/xla/python/xla_client.py index 9c81f6439d..f6809b6b87 100644 --- a/tensorflow/compiler/xla/python/xla_client.py +++ b/tensorflow/compiler/xla/python/xla_client.py @@ -166,14 +166,14 @@ class LocalBuffer(object): self._delete = c_api.DeleteLocalShapedBuffer @staticmethod - def from_py(npval, layout_fn=None): - npval = require_numpy_array_layout(npval) + def from_pyval(pyval, layout_fn=None): + pyval = require_numpy_array_layout(pyval) if layout_fn: - shape = Shape.from_numpy(npval) + shape = Shape.from_pyval(pyval) shape = shape.map_leaves(layout_fn) else: shape = None - return LocalBuffer(c_api.LocalShapedBuffer.FromLiteral(npval, shape)) + return LocalBuffer(c_api.LocalShapedBuffer.FromLiteral(pyval, shape)) def to_py(self): return self.c_local_shaped_buffer.ToLiteral() @@ -191,53 +191,104 @@ class LocalBuffer(object): class Shape(object): - """XLA shape. + """Represents an XLA shape. - Represents an XLA shape by a corresponding Python/Numpy type and a - list of dimensions, which are themselves Shapes in case this one - represents an XLA tuple. + A shape is either an array shape, having rank-many integer + dimensions and an element type (represented by a Numpy dtype), or it + is a tuple shape, having a shape for every tuple component: + + type shape = + TupleShape of shape list + | ArrayShape of { dimensions: int list; element_type: dtype } + + Callers are expected to instantiate this class only via the static + constructors: tuple_shape, array_shape, and from_pyval. """ - def __init__(self, np_dtype, dimensions, minor_to_major=None): + @staticmethod + def tuple_shape(tuple_shapes): + """Construct a tuple shape.""" + if (not isinstance(tuple_shapes, (tuple, list)) or + not all(isinstance(t, Shape) for t in tuple_shapes)): + raise TypeError('tuple_shapes must be a tuple of Shapes') + return Shape(tuple_shapes, tuple) + + @staticmethod + def array_shape(element_type, dimensions, minor_to_major=None): + """Construct an array shape.""" + if (not isinstance(dimensions, tuple) or + not all(isinstance(i, int) for i in dimensions)): + dimensions = tuple(int(i) for i in dimensions) + return Shape(dimensions, np.dtype(element_type), + minor_to_major=minor_to_major) + + @staticmethod + def from_pyval(pyval): + def convert(pyval): + if isinstance(pyval, tuple): + return Shape.tuple_shape(tuple(convert(elt) for elt in pyval)) + else: + pyval = require_numpy_array_layout(pyval) + return Shape.array_shape(pyval.dtype, np.shape(pyval)) + return convert(pyval) + + def __init__(self, dimensions, dtype, minor_to_major=None): assert isinstance(dimensions, tuple) - self.np_dtype = np_dtype self._dimensions = dimensions + self._dtype = dtype + self._is_tuple = dtype == tuple self._minor_to_major = minor_to_major self._check_minor_to_major() def __eq__(self, other): # pylint: disable=protected-access - return (self.np_dtype == other.np_dtype and + return (self._dtype == other._dtype and self._dimensions == other._dimensions and self._minor_to_major == other._minor_to_major) def __repr__(self): - return ('xla_client.Shape(np_dtype={!r}, dimensions={!r}, ' - 'minor_to_major={!r})').format(self.np_dtype, self._dimensions, - self._minor_to_major) - - def element_type(self): - return DTYPE_TO_XLA_ELEMENT_TYPE[str(self.np_dtype)] + return ('xla_client.Shape(_dtype={!r}, _dimensions={!r}, ' + '_is_tuple={!r}), _minor_to_major={!r}').format( + self._dtype, self._dimensions, self._is_tuple, + self._minor_to_major) def is_tuple(self): - return self.element_type() == xla_data_pb2.TUPLE + return self._is_tuple - def dimensions(self): - if self.is_tuple(): - raise ValueError('Tuple shape has no dimensions') - return self._dimensions - - def minor_to_major(self): - return self._minor_to_major + def is_array(self): + return not self._is_tuple def tuple_shapes(self): if not self.is_tuple(): - raise ValueError('Shape is not a tuple shape') + raise ValueError('not a tuple shape') + return self._dimensions + + def numpy_dtype(self): + """Like element_type(), but returns dtype('O') in case of a tuple shape.""" + if self.is_tuple(): + return np.dtype(np.object) + else: + return self.element_type() + + def xla_element_type(self): + return DTYPE_TO_XLA_ELEMENT_TYPE[str(self.numpy_dtype())] + + def element_type(self): + if not self.is_array(): + raise ValueError('not an array shape') + return self._dtype + + def dimensions(self): + if not self.is_array(): + raise ValueError('not an array shape') return self._dimensions def rank(self): return len(self.dimensions()) + def minor_to_major(self): + return self._minor_to_major + def map_leaves(self, f): """Map f over each leaf-level array subshape. @@ -250,7 +301,7 @@ class Shape(object): """ if self.is_tuple(): children = tuple(child.map_leaves(f) for child in self.tuple_shapes()) - return Shape(np.dtype('O'), children) + return Shape.tuple_shape(children) else: mapped = f(self) return self if mapped is None else mapped @@ -264,30 +315,24 @@ class Shape(object): assert sorted(mtm) == range(len(mtm)), self def update_minor_to_major(self, minor_to_major): + if not self.is_array(): + raise ValueError('not an array shape') if not isinstance(minor_to_major, tuple): raise TypeError('minor_to_major must be a tuple') - updated = Shape(self.np_dtype, tuple(self.dimensions()), minor_to_major) + updated = Shape.array_shape( + self.element_type(), self.dimensions(), minor_to_major) updated._check_minor_to_major() # pylint: disable=protected-access return updated - @staticmethod - def from_numpy(npval): - - def convert(npval): - if isinstance(npval, tuple): - return Shape(np.dtype('O'), tuple(convert(elt) for elt in npval)) - else: - return Shape(npval.dtype, np.shape(npval)) - - return convert(require_numpy_array_layout(npval)) - def _wrap_shape(shape_info): dtype, dims = shape_info element_type = DTYPE_TO_XLA_ELEMENT_TYPE[str(dtype)] if element_type == xla_data_pb2.TUPLE: - dims = tuple(_wrap_shape(subshape_info) for subshape_info in dims) - return Shape(dtype, dims) + shapes = tuple(_wrap_shape(subshape_info) for subshape_info in dims) + return Shape.tuple_shape(shapes) + else: + return Shape.array_shape(dtype, dims) def _wrap_data_handle(handle): @@ -420,7 +465,7 @@ class LocalComputation(object): compile_options=None, layout_fn=None): return self.Compile( - argument_shapes=[Shape.from_numpy(arg) for arg in arguments], + argument_shapes=[Shape.from_pyval(arg) for arg in arguments], compile_options=compile_options, layout_fn=layout_fn) @@ -428,7 +473,7 @@ class LocalComputation(object): """Execute with Python values as arguments and return value.""" if not self.is_compiled: raise ValueError('Cannot execute an uncompiled local XLA computation.') - argument_shapes = [Shape.from_numpy(arg) for arg in arguments] + argument_shapes = [Shape.from_pyval(arg) for arg in arguments] if layout_fn: argument_shapes = [ shape.map_leaves(layout_fn) for shape in argument_shapes @@ -607,7 +652,7 @@ class ComputationBuilder(object): A ComputationDataHandle message. """ return self.ParameterWithShape( - Shape.from_numpy(value), name=name, parameter_num=parameter_num) + Shape.from_pyval(value), name=name, parameter_num=parameter_num) def Broadcast(self, operand, sizes): """Enqueues a broadcast operation onto the computation. @@ -968,7 +1013,7 @@ class ComputationBuilder(object): Returns: a ComputationDataHandle to the generated array of F32 values. """ - shape = Shape(self.GetShape(mu).np_dtype, dims) + shape = Shape.array_shape(self.GetShape(mu).element_type(), dims) return _wrap_data_handle( self._client.RngNormal( _unwrap_data_handle(mu), _unwrap_data_handle(sigma), shape)) @@ -988,7 +1033,7 @@ class ComputationBuilder(object): Returns: a ComputationDataHandle to the generated array of values with the same numeric type (F32, S32, or U32) as the arguments a and b. """ - shape = Shape(self.GetShape(a).np_dtype, dims) + shape = Shape.array_shape(self.GetShape(a).element_type(), dims) return _wrap_data_handle( self._client.RngUniform( _unwrap_data_handle(a), _unwrap_data_handle(b), shape)) diff --git a/tensorflow/compiler/xla/python/xla_client_test.py b/tensorflow/compiler/xla/python/xla_client_test.py index d97264ea64..6fe7b242e4 100644 --- a/tensorflow/compiler/xla/python/xla_client_test.py +++ b/tensorflow/compiler/xla/python/xla_client_test.py @@ -319,7 +319,7 @@ class LocalBufferTest(LocalComputationTest): def _Execute(self, c, arguments): compiled_c = c.Build().CompileWithExampleArguments(arguments) - arg_buffers = [xla_client.LocalBuffer.from_py(arg) for arg in arguments] + arg_buffers = [xla_client.LocalBuffer.from_pyval(arg) for arg in arguments] result_buffer = compiled_c.ExecuteWithLocalBuffers(arg_buffers) return result_buffer.to_py() @@ -350,7 +350,7 @@ class LocalBufferTest(LocalComputationTest): c.Add(c.ParameterFromNumpy(NumpyArrayF32(0.)), c.ConstantF32Scalar(3.14)) arg = NumpyArrayF32(1.11) compiled_c = c.Build().CompileWithExampleArguments([arg]) - arg_buffer = xla_client.LocalBuffer.from_py(arg) + arg_buffer = xla_client.LocalBuffer.from_pyval(arg) arg_buffer.delete() with self.assertRaises(ValueError): compiled_c.ExecuteWithLocalBuffers([arg_buffer]) @@ -1288,7 +1288,7 @@ class EmbeddedComputationsTest(LocalComputationTest): def testInfeedS32Values(self): to_infeed = NumpyArrayS32([1, 2, 3, 4]) c = self._NewComputation() - c.Infeed(xla_client.Shape.from_numpy(to_infeed[0])) + c.Infeed(xla_client.Shape.from_pyval(to_infeed[0])) compiled_c = c.Build().CompileWithExampleArguments() for item in to_infeed: xla_client.transfer_to_infeed(item) @@ -1300,7 +1300,7 @@ class EmbeddedComputationsTest(LocalComputationTest): def testInfeedThenOutfeedS32(self): to_round_trip = NumpyArrayS32([1, 2, 3, 4]) c = self._NewComputation() - x = c.Infeed(xla_client.Shape.from_numpy(to_round_trip[0])) + x = c.Infeed(xla_client.Shape.from_pyval(to_round_trip[0])) c.Outfeed(x) compiled_c = c.Build().CompileWithExampleArguments() @@ -1310,7 +1310,7 @@ class EmbeddedComputationsTest(LocalComputationTest): execution.start() xla_client.transfer_to_infeed(want) got = xla_client.transfer_from_outfeed( - xla_client.Shape.from_numpy(to_round_trip[0])) + xla_client.Shape.from_pyval(to_round_trip[0])) execution.join() self.assertEqual(want, got) -- GitLab From f7e8fbb28a0fa4e979a94d7b458706abf48f7deb Mon Sep 17 00:00:00 2001 From: Asim Shankar Date: Thu, 19 Apr 2018 23:08:53 -0700 Subject: [PATCH 056/434] Automated g4 rollback of changelist 193602050 PiperOrigin-RevId: 193625346 --- tensorflow/core/lib/io/record_reader.cc | 147 ++++---------- tensorflow/core/lib/io/record_reader.h | 16 +- tensorflow/core/lib/io/recordio_test.cc | 212 ++++++++++++++------- tensorflow/core/lib/io/zlib_inputstream.cc | 16 +- tensorflow/core/lib/io/zlib_inputstream.h | 19 +- 5 files changed, 220 insertions(+), 190 deletions(-) diff --git a/tensorflow/core/lib/io/record_reader.cc b/tensorflow/core/lib/io/record_reader.cc index 6de850bb20..c24628be57 100644 --- a/tensorflow/core/lib/io/record_reader.cc +++ b/tensorflow/core/lib/io/record_reader.cc @@ -56,110 +56,55 @@ RecordReaderOptions RecordReaderOptions::CreateRecordReaderOptions( RecordReader::RecordReader(RandomAccessFile* file, const RecordReaderOptions& options) - : src_(file), options_(options) { + : options_(options), + input_stream_(new RandomAccessInputStream(file)), + last_read_failed_(false) { if (options.buffer_size > 0) { - input_stream_.reset(new BufferedInputStream(file, options.buffer_size)); - } else { - input_stream_.reset(new RandomAccessInputStream(file)); + input_stream_.reset(new BufferedInputStream(input_stream_.release(), + options.buffer_size, true)); } if (options.compression_type == RecordReaderOptions::ZLIB_COMPRESSION) { // We don't have zlib available on all embedded platforms, so fail. #if defined(IS_SLIM_BUILD) LOG(FATAL) << "Zlib compression is unsupported on mobile platforms."; #else // IS_SLIM_BUILD - zlib_input_stream_.reset(new ZlibInputStream( - input_stream_.get(), options.zlib_options.input_buffer_size, - options.zlib_options.output_buffer_size, options.zlib_options)); + input_stream_.reset(new ZlibInputStream( + input_stream_.release(), options.zlib_options.input_buffer_size, + options.zlib_options.output_buffer_size, options.zlib_options, true)); #endif // IS_SLIM_BUILD } else if (options.compression_type == RecordReaderOptions::NONE) { // Nothing to do. } else { - LOG(FATAL) << "Unspecified compression type :" << options.compression_type; + LOG(FATAL) << "Unrecognized compression type :" << options.compression_type; } } // Read n+4 bytes from file, verify that checksum of first n bytes is // stored in the last 4 bytes and store the first n bytes in *result. -// May use *storage as backing store. -Status RecordReader::ReadChecksummed(uint64 offset, size_t n, - StringPiece* result, string* storage) { +// +// offset corresponds to the user-provided value to ReadRecord() +// and is used only in error messages. +Status RecordReader::ReadChecksummed(uint64 offset, size_t n, string* result) { if (n >= SIZE_MAX - sizeof(uint32)) { return errors::DataLoss("record size too large"); } const size_t expected = n + sizeof(uint32); - storage->resize(expected); - -#if !defined(IS_SLIM_BUILD) - if (zlib_input_stream_) { - // If we have a zlib compressed buffer, we assume that the - // file is being read sequentially, and we use the underlying - // implementation to read the data. - // - // No checks are done to validate that the file is being read - // sequentially. At some point the zlib input buffer may support - // seeking, possibly inefficiently. - TF_RETURN_IF_ERROR(zlib_input_stream_->ReadNBytes(expected, storage)); - - if (storage->size() != expected) { - if (storage->empty()) { - return errors::OutOfRange("eof"); - } else { - return errors::DataLoss("truncated record at ", offset); - } - } + TF_RETURN_IF_ERROR(input_stream_->ReadNBytes(expected, result)); - uint32 masked_crc = core::DecodeFixed32(storage->data() + n); - if (crc32c::Unmask(masked_crc) != crc32c::Value(storage->data(), n)) { - return errors::DataLoss("corrupted record at ", offset); - } - *result = StringPiece(storage->data(), n); - } else { -#endif // IS_SLIM_BUILD - if (options_.buffer_size > 0) { - // If we have a buffer, we assume that the file is being read - // sequentially, and we use the underlying implementation to read the - // data. - // - // No checks are done to validate that the file is being read - // sequentially. - TF_RETURN_IF_ERROR(input_stream_->ReadNBytes(expected, storage)); - - if (storage->size() != expected) { - if (storage->empty()) { - return errors::OutOfRange("eof"); - } else { - return errors::DataLoss("truncated record at ", offset); - } - } - - const uint32 masked_crc = core::DecodeFixed32(storage->data() + n); - if (crc32c::Unmask(masked_crc) != crc32c::Value(storage->data(), n)) { - return errors::DataLoss("corrupted record at ", offset); - } - *result = StringPiece(storage->data(), n); + if (result->size() != expected) { + if (result->empty()) { + return errors::OutOfRange("eof"); } else { - // This version supports reading from arbitrary offsets - // since we are accessing the random access file directly. - StringPiece data; - TF_RETURN_IF_ERROR(src_->Read(offset, expected, &data, &(*storage)[0])); - if (data.size() != expected) { - if (data.empty()) { - return errors::OutOfRange("eof"); - } else { - return errors::DataLoss("truncated record at ", offset); - } - } - const uint32 masked_crc = core::DecodeFixed32(data.data() + n); - if (crc32c::Unmask(masked_crc) != crc32c::Value(data.data(), n)) { - return errors::DataLoss("corrupted record at ", offset); - } - *result = StringPiece(data.data(), n); + return errors::DataLoss("truncated record at ", offset); } -#if !defined(IS_SLIM_BUILD) } -#endif // IS_SLIM_BUILD + const uint32 masked_crc = core::DecodeFixed32(result->data() + n); + if (crc32c::Unmask(masked_crc) != crc32c::Value(result->data(), n)) { + return errors::DataLoss("corrupted record at ", offset); + } + result->resize(n); return Status::OK(); } @@ -167,50 +112,42 @@ Status RecordReader::ReadRecord(uint64* offset, string* record) { static const size_t kHeaderSize = sizeof(uint64) + sizeof(uint32); static const size_t kFooterSize = sizeof(uint32); + // Position the input stream. + int64 curr_pos = input_stream_->Tell(); + int64 desired_pos = static_cast(*offset); + if (curr_pos > desired_pos || curr_pos < 0 /* EOF */ || + (curr_pos == desired_pos && last_read_failed_)) { + last_read_failed_ = false; + TF_RETURN_IF_ERROR(input_stream_->Reset()); + TF_RETURN_IF_ERROR(input_stream_->SkipNBytes(desired_pos)); + } else if (curr_pos < desired_pos) { + TF_RETURN_IF_ERROR(input_stream_->SkipNBytes(desired_pos - curr_pos)); + } + DCHECK_EQ(desired_pos, input_stream_->Tell()); + // Read header data. - StringPiece lbuf; - Status s = ReadChecksummed(*offset, sizeof(uint64), &lbuf, record); + Status s = ReadChecksummed(*offset, sizeof(uint64), record); if (!s.ok()) { + last_read_failed_ = true; return s; } - const uint64 length = core::DecodeFixed64(lbuf.data()); + const uint64 length = core::DecodeFixed64(record->data()); // Read data - StringPiece data; - s = ReadChecksummed(*offset + kHeaderSize, length, &data, record); + s = ReadChecksummed(*offset + kHeaderSize, length, record); if (!s.ok()) { + last_read_failed_ = true; if (errors::IsOutOfRange(s)) { s = errors::DataLoss("truncated record at ", *offset); } return s; } - if (record->data() != data.data()) { - // RandomAccessFile placed the data in some other location. - memmove(&(*record)[0], data.data(), data.size()); - } - - record->resize(data.size()); - *offset += kHeaderSize + length + kFooterSize; + DCHECK_EQ(*offset, input_stream_->Tell()); return Status::OK(); } -Status RecordReader::SkipNBytes(uint64 offset) { -#if !defined(IS_SLIM_BUILD) - if (zlib_input_stream_) { - TF_RETURN_IF_ERROR(zlib_input_stream_->SkipNBytes(offset)); - } else { -#endif - if (options_.buffer_size > 0) { - TF_RETURN_IF_ERROR(input_stream_->SkipNBytes(offset)); - } -#if !defined(IS_SLIM_BUILD) - } -#endif - return Status::OK(); -} // namespace io - SequentialRecordReader::SequentialRecordReader( RandomAccessFile* file, const RecordReaderOptions& options) : underlying_(file, options), offset_(0) {} diff --git a/tensorflow/core/lib/io/record_reader.h b/tensorflow/core/lib/io/record_reader.h index 26278e0328..f6d587dfa0 100644 --- a/tensorflow/core/lib/io/record_reader.h +++ b/tensorflow/core/lib/io/record_reader.h @@ -69,25 +69,14 @@ class RecordReader { // Read the record at "*offset" into *record and update *offset to // point to the offset of the next record. Returns OK on success, // OUT_OF_RANGE for end of file, or something else for an error. - // - // Note: if buffering is used (with or without compression), access must be - // sequential. Status ReadRecord(uint64* offset, string* record); - // Skip the records till "offset". Returns OK on success, - // OUT_OF_RANGE for end of file, or something else for an error. - Status SkipNBytes(uint64 offset); - private: - Status ReadChecksummed(uint64 offset, size_t n, StringPiece* result, - string* storage); + Status ReadChecksummed(uint64 offset, size_t n, string* result); - RandomAccessFile* src_; RecordReaderOptions options_; std::unique_ptr input_stream_; -#if !defined(IS_SLIM_BUILD) - std::unique_ptr zlib_input_stream_; -#endif // IS_SLIM_BUILD + bool last_read_failed_; TF_DISALLOW_COPY_AND_ASSIGN(RecordReader); }; @@ -121,7 +110,6 @@ class SequentialRecordReader { return errors::InvalidArgument( "Trying to seek offset: ", offset, " which is less than the current offset: ", offset_); - TF_RETURN_IF_ERROR(underlying_.SkipNBytes(offset - offset_)); offset_ = offset; return Status::OK(); } diff --git a/tensorflow/core/lib/io/recordio_test.cc b/tensorflow/core/lib/io/recordio_test.cc index 63235761d9..da514bd21c 100644 --- a/tensorflow/core/lib/io/recordio_test.cc +++ b/tensorflow/core/lib/io/recordio_test.cc @@ -26,10 +26,11 @@ limitations under the License. namespace tensorflow { namespace io { +namespace { // Construct a string of the specified length made out of the supplied // partial string. -static string BigString(const string& partial_string, size_t n) { +string BigString(const string& partial_string, size_t n) { string result; while (result.size() < n) { result.append(partial_string); @@ -39,62 +40,66 @@ static string BigString(const string& partial_string, size_t n) { } // Construct a string from a number -static string NumberString(int n) { +string NumberString(int n) { char buf[50]; snprintf(buf, sizeof(buf), "%d.", n); return string(buf); } // Return a skewed potentially long string -static string RandomSkewedString(int i, random::SimplePhilox* rnd) { +string RandomSkewedString(int i, random::SimplePhilox* rnd) { return BigString(NumberString(i), rnd->Skewed(17)); } -class RecordioTest : public ::testing::Test { +class StringDest : public WritableFile { + public: + explicit StringDest(string* contents) : contents_(contents) {} + + Status Close() override { return Status::OK(); } + Status Flush() override { return Status::OK(); } + Status Sync() override { return Status::OK(); } + Status Append(const StringPiece& slice) override { + contents_->append(slice.data(), slice.size()); + return Status::OK(); + } + private: - class StringDest : public WritableFile { - public: - string contents_; - - Status Close() override { return Status::OK(); } - Status Flush() override { return Status::OK(); } - Status Sync() override { return Status::OK(); } - Status Append(const StringPiece& slice) override { - contents_.append(slice.data(), slice.size()); - return Status::OK(); + string* contents_; +}; + +class StringSource : public RandomAccessFile { + public: + explicit StringSource(string* contents) + : contents_(contents), force_error_(false) {} + + Status Read(uint64 offset, size_t n, StringPiece* result, + char* scratch) const override { + if (force_error_) { + force_error_ = false; + return errors::DataLoss("read error"); } - }; - - class StringSource : public RandomAccessFile { - public: - StringPiece contents_; - mutable bool force_error_; - mutable bool returned_partial_; - StringSource() : force_error_(false), returned_partial_(false) {} - - Status Read(uint64 offset, size_t n, StringPiece* result, - char* scratch) const override { - EXPECT_FALSE(returned_partial_) << "must not Read() after eof/error"; - - if (force_error_) { - force_error_ = false; - returned_partial_ = true; - return errors::DataLoss("read error"); - } - - if (offset >= contents_.size()) { - return errors::OutOfRange("end of file"); - } - - if (contents_.size() < offset + n) { - n = contents_.size() - offset; - returned_partial_ = true; - } - *result = StringPiece(contents_.data() + offset, n); - return Status::OK(); + + if (offset >= contents_->size()) { + return errors::OutOfRange("end of file"); + } + + if (contents_->size() < offset + n) { + n = contents_->size() - offset; } - }; + *result = StringPiece(contents_->data() + offset, n); + return Status::OK(); + } + + void force_error() { force_error_ = true; } + + private: + string* contents_; + mutable bool force_error_; +}; +class RecordioTest : public ::testing::Test { + private: + string contents_; StringDest dest_; StringSource source_; bool reading_; @@ -104,7 +109,9 @@ class RecordioTest : public ::testing::Test { public: RecordioTest() - : reading_(false), + : dest_(&contents_), + source_(&contents_), + reading_(false), readpos_(0), writer_(new RecordWriter(&dest_)), reader_(new RecordReader(&source_)) {} @@ -119,12 +126,11 @@ class RecordioTest : public ::testing::Test { TF_ASSERT_OK(writer_->WriteRecord(StringPiece(msg))); } - size_t WrittenBytes() const { return dest_.contents_.size(); } + size_t WrittenBytes() const { return contents_.size(); } string Read() { if (!reading_) { reading_ = true; - source_.contents_ = StringPiece(dest_.contents_); } string record; Status s = reader_->ReadRecord(&readpos_, &record); @@ -137,26 +143,20 @@ class RecordioTest : public ::testing::Test { } } - void IncrementByte(int offset, int delta) { - dest_.contents_[offset] += delta; - } + void IncrementByte(int offset, int delta) { contents_[offset] += delta; } - void SetByte(int offset, char new_byte) { - dest_.contents_[offset] = new_byte; - } + void SetByte(int offset, char new_byte) { contents_[offset] = new_byte; } - void ShrinkSize(int bytes) { - dest_.contents_.resize(dest_.contents_.size() - bytes); - } + void ShrinkSize(int bytes) { contents_.resize(contents_.size() - bytes); } void FixChecksum(int header_offset, int len) { // Compute crc of type/len/data - uint32_t crc = crc32c::Value(&dest_.contents_[header_offset + 6], 1 + len); + uint32_t crc = crc32c::Value(&contents_[header_offset + 6], 1 + len); crc = crc32c::Mask(crc); - core::EncodeFixed32(&dest_.contents_[header_offset], crc); + core::EncodeFixed32(&contents_[header_offset], crc); } - void ForceError() { source_.force_error_ = true; } + void ForceError() { source_.force_error(); } void StartReadingAt(uint64_t initial_offset) { readpos_ = initial_offset; } @@ -165,7 +165,6 @@ class RecordioTest : public ::testing::Test { Write("bar"); Write(BigString("x", 10000)); reading_ = true; - source_.contents_ = StringPiece(dest_.contents_); uint64 offset = WrittenBytes() + offset_past_end; string record; Status s = reader_->ReadRecord(&offset, &record); @@ -217,16 +216,100 @@ TEST_F(RecordioTest, RandomRead) { ASSERT_EQ("EOF", Read()); } +void TestNonSequentialReads(const RecordWriterOptions& writer_options, + const RecordReaderOptions& reader_options) { + string contents; + StringDest dst(&contents); + RecordWriter writer(&dst, writer_options); + for (int i = 0; i < 10; ++i) { + TF_ASSERT_OK(writer.WriteRecord(NumberString(i))) << i; + } + TF_ASSERT_OK(writer.Close()); + + StringSource file(&contents); + RecordReader reader(&file, reader_options); + + string record; + // First read sequentially to fill in the offsets table. + uint64 offsets[10] = {0}; + uint64 offset = 0; + for (int i = 0; i < 10; ++i) { + offsets[i] = offset; + TF_ASSERT_OK(reader.ReadRecord(&offset, &record)) << i; + } + + // Read randomly: First go back to record #3 then forward to #8. + offset = offsets[3]; + TF_ASSERT_OK(reader.ReadRecord(&offset, &record)); + EXPECT_EQ("3.", record); + EXPECT_EQ(offsets[4], offset); + + offset = offsets[8]; + TF_ASSERT_OK(reader.ReadRecord(&offset, &record)); + EXPECT_EQ("8.", record); + EXPECT_EQ(offsets[9], offset); +} + +TEST_F(RecordioTest, NonSequentialReads) { + TestNonSequentialReads(RecordWriterOptions(), RecordReaderOptions()); +} + +TEST_F(RecordioTest, NonSequentialReadsWithReadBuffer) { + RecordReaderOptions options; + options.buffer_size = 1 << 10; + TestNonSequentialReads(RecordWriterOptions(), options); +} + +TEST_F(RecordioTest, NonSequentialReadsWithCompression) { + TestNonSequentialReads( + RecordWriterOptions::CreateRecordWriterOptions("ZLIB"), + RecordReaderOptions::CreateRecordReaderOptions("ZLIB")); +} + // Tests of all the error paths in log_reader.cc follow: -static void AssertHasSubstr(StringPiece s, StringPiece expected) { +void AssertHasSubstr(StringPiece s, StringPiece expected) { EXPECT_TRUE(str_util::StrContains(s, expected)) << s << " does not contain " << expected; } +void TestReadError(const RecordWriterOptions& writer_options, + const RecordReaderOptions& reader_options) { + const string wrote = BigString("well hello there!", 100); + string contents; + StringDest dst(&contents); + TF_ASSERT_OK(RecordWriter(&dst, writer_options).WriteRecord(wrote)); + + StringSource file(&contents); + RecordReader reader(&file, reader_options); + + uint64 offset = 0; + string read; + file.force_error(); + Status status = reader.ReadRecord(&offset, &read); + ASSERT_TRUE(errors::IsDataLoss(status)); + ASSERT_EQ(0, offset); + + // A failed Read() shouldn't update the offset, and thus a retry shouldn't + // lose the record. + status = reader.ReadRecord(&offset, &read); + ASSERT_TRUE(status.ok()) << status; + EXPECT_GT(offset, 0); + EXPECT_EQ(wrote, read); +} + TEST_F(RecordioTest, ReadError) { - Write("foo"); - ForceError(); - AssertHasSubstr(Read(), "Data loss"); + TestReadError(RecordWriterOptions(), RecordReaderOptions()); +} + +TEST_F(RecordioTest, ReadErrorWithBuffering) { + RecordReaderOptions options; + options.buffer_size = 1 << 20; + TestReadError(RecordWriterOptions(), options); +} + +TEST_F(RecordioTest, ReadErrorWithCompression) { + TestReadError(RecordWriterOptions::CreateRecordWriterOptions("ZLIB"), + RecordReaderOptions::CreateRecordReaderOptions("ZLIB")); } TEST_F(RecordioTest, CorruptLength) { @@ -257,5 +340,6 @@ TEST_F(RecordioTest, ReadEnd) { CheckOffsetPastEndReturnsNoRecords(0); } TEST_F(RecordioTest, ReadPastEnd) { CheckOffsetPastEndReturnsNoRecords(5); } +} // namespace } // namespace io } // namespace tensorflow diff --git a/tensorflow/core/lib/io/zlib_inputstream.cc b/tensorflow/core/lib/io/zlib_inputstream.cc index 984fbc2810..47de36bf6c 100644 --- a/tensorflow/core/lib/io/zlib_inputstream.cc +++ b/tensorflow/core/lib/io/zlib_inputstream.cc @@ -25,8 +25,9 @@ ZlibInputStream::ZlibInputStream( InputStreamInterface* input_stream, size_t input_buffer_bytes, // size of z_stream.next_in buffer size_t output_buffer_bytes, // size of z_stream.next_out buffer - const ZlibCompressionOptions& zlib_options) - : input_stream_(input_stream), + const ZlibCompressionOptions& zlib_options, bool owns_input_stream) + : owns_input_stream_(owns_input_stream), + input_stream_(input_stream), input_buffer_capacity_(input_buffer_bytes), output_buffer_capacity_(output_buffer_bytes), z_stream_input_(new Bytef[input_buffer_capacity_]), @@ -37,14 +38,25 @@ ZlibInputStream::ZlibInputStream( InitZlibBuffer(); } +ZlibInputStream::ZlibInputStream(InputStreamInterface* input_stream, + size_t input_buffer_bytes, + size_t output_buffer_bytes, + const ZlibCompressionOptions& zlib_options) + : ZlibInputStream(input_stream, input_buffer_bytes, output_buffer_bytes, + zlib_options, false) {} + ZlibInputStream::~ZlibInputStream() { if (z_stream_) { inflateEnd(z_stream_.get()); } + if (owns_input_stream_) { + delete input_stream_; + } } Status ZlibInputStream::Reset() { TF_RETURN_IF_ERROR(input_stream_->Reset()); + inflateEnd(z_stream_.get()); InitZlibBuffer(); bytes_read_ = 0; return Status::OK(); diff --git a/tensorflow/core/lib/io/zlib_inputstream.h b/tensorflow/core/lib/io/zlib_inputstream.h index 9c7e14441c..37339163ee 100644 --- a/tensorflow/core/lib/io/zlib_inputstream.h +++ b/tensorflow/core/lib/io/zlib_inputstream.h @@ -40,7 +40,15 @@ class ZlibInputStream : public InputStreamInterface { // Create a ZlibInputStream for `input_stream` with a buffer of size // `input_buffer_bytes` bytes for reading contents from `input_stream` and // another buffer with size `output_buffer_bytes` for caching decompressed - // contents. Does *not* take ownership of "input_stream". + // contents. + // + // Takes ownership of `input_stream` iff `owns_input_stream` is true. + ZlibInputStream(InputStreamInterface* input_stream, size_t input_buffer_bytes, + size_t output_buffer_bytes, + const ZlibCompressionOptions& zlib_options, + bool owns_input_stream); + + // Equivalent to the previous constructor with owns_input_stream=false. ZlibInputStream(InputStreamInterface* input_stream, size_t input_buffer_bytes, size_t output_buffer_bytes, const ZlibCompressionOptions& zlib_options); @@ -65,10 +73,11 @@ class ZlibInputStream : public InputStreamInterface { private: void InitZlibBuffer(); - InputStreamInterface* input_stream_; // Not owned - size_t input_buffer_capacity_; // Size of z_stream_input_ - size_t output_buffer_capacity_; // Size of z_stream_output_ - char* next_unread_byte_; // Next unread byte in z_stream_output_ + const bool owns_input_stream_; + InputStreamInterface* input_stream_; + size_t input_buffer_capacity_; // Size of z_stream_input_ + size_t output_buffer_capacity_; // Size of z_stream_output_ + char* next_unread_byte_; // Next unread byte in z_stream_output_ // Buffer for storing contents read from compressed stream. // TODO(srbs): Consider using circular buffers. That would greatly simplify -- GitLab From d2fd0bbac6368a6b41e73d18c93b24442f5653f1 Mon Sep 17 00:00:00 2001 From: Dimitris Vardoulakis Date: Thu, 19 Apr 2018 23:35:04 -0700 Subject: [PATCH 057/434] [TF:XLA] Factor out the handling of while instructions to make HloVerifier::Run shorter. PiperOrigin-RevId: 193626864 --- .../compiler/xla/service/hlo_verifier.cc | 83 +++++++++++-------- .../compiler/xla/service/hlo_verifier.h | 8 +- 2 files changed, 55 insertions(+), 36 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc index 8c875698eb..80ed6d6832 100644 --- a/tensorflow/compiler/xla/service/hlo_verifier.cc +++ b/tensorflow/compiler/xla/service/hlo_verifier.cc @@ -731,6 +731,55 @@ Status HloVerifier::CheckFusionInstruction(HloInstruction* fusion) const { return tensorflow::Status::OK(); } +Status HloVerifier::CheckWhileInstruction(HloInstruction* instruction) { + auto* while_cond = instruction->while_condition(); + auto* while_body = instruction->while_body(); + if (while_cond->num_parameters() != 1) { + return FailedPrecondition( + "While condition must have exactly 1 parameter; had %lld : %s", + while_cond->num_parameters(), while_cond->ToString().c_str()); + } + if (while_body->num_parameters() != 1) { + return FailedPrecondition( + "While body must have exactly 1 parameter; had %lld : %s", + while_body->num_parameters(), while_body->ToString().c_str()); + } + if (instruction->operand_count() != 1) { + return FailedPrecondition( + "While loop must have exactly one operand; had %lld : %s", + instruction->operand_count(), instruction->ToString().c_str()); + } + auto* init = instruction->operand(0); + auto* cond_param = while_cond->parameter_instruction(0); + if (!ShapeUtil::Compatible(init->shape(), cond_param->shape())) { + return FailedPrecondition( + "While condition's parameter must have the same shape as the " + "loop's 'init'. init: %s, param: %s", + init->ToString().c_str(), cond_param->ToString().c_str()); + } + auto* cond_root = while_cond->root_instruction(); + if (!ShapeUtil::Compatible(cond_root->shape(), + ShapeUtil::MakeShape(PRED, {}))) { + return FailedPrecondition("While condition should have shape PRED: %s", + cond_root->ToString().c_str()); + } + auto* body_param = while_body->parameter_instruction(0); + if (!ShapeUtil::Compatible(init->shape(), body_param->shape())) { + return FailedPrecondition( + "While body's parameter must have the same shape as the loop's" + " 'init'. init: %s, param: %s", + init->ToString().c_str(), body_param->ToString().c_str()); + } + auto* body_root = while_body->root_instruction(); + if (!ShapeUtil::Compatible(init->shape(), body_root->shape())) { + return FailedPrecondition( + "While body should have same shape as the loop's 'init'." + "init: %s, body: %s", + init->ToString().c_str(), body_root->ToString().c_str()); + } + return tensorflow::Status::OK(); +} + StatusOr HloVerifier::Run(HloModule* module) { TF_RETURN_IF_ERROR(VerifyHloStructure(module)); @@ -771,39 +820,7 @@ StatusOr HloVerifier::Run(HloModule* module) { << instruction->dimensions().size() << " != " << ShapeUtil::Rank(instruction->operand(0)->shape()); } else if (instruction->opcode() == HloOpcode::kWhile) { - auto* while_cond = instruction->while_condition(); - auto* while_body = instruction->while_body(); - TF_RET_CHECK(while_cond->num_parameters() == 1) - << "While condition must have exactly 1 parameter; had " - << while_cond->num_parameters() << ": " << while_cond->ToString(); - TF_RET_CHECK(while_body->num_parameters() == 1) - << "While body must have exactly 1 parameter; had " - << while_body->num_parameters() << ": " << while_body->ToString(); - TF_RET_CHECK(instruction->operand_count() == 1) - << "While loop must have exactly one operand; had " - << instruction->operand_count() << ": " << instruction->ToString(); - - auto* init = instruction->operand(0); - auto* cond_param = while_cond->parameter_instruction(0); - TF_RET_CHECK(ShapeUtil::Compatible(init->shape(), cond_param->shape())) - << "While condition's parameter must have the same shape as the " - "loop's 'init'. init: " - << init->ToString() << ", param: " << cond_param->ToString(); - auto* cond_root = while_cond->root_instruction(); - TF_RET_CHECK(ShapeUtil::Compatible(cond_root->shape(), - ShapeUtil::MakeShape(PRED, {}))) - << "While condition should have shape PRED: " - << cond_root->ToString(); - - auto* body_param = while_body->parameter_instruction(0); - TF_RET_CHECK(ShapeUtil::Compatible(init->shape(), body_param->shape())) - << "While body's parameter must have the same shape as the loop's " - "'init'. init: " - << init->ToString() << ", param: " << body_param->ToString(); - auto* body_root = while_body->root_instruction(); - TF_RET_CHECK(ShapeUtil::Compatible(init->shape(), body_root->shape())) - << "While body should have same shape as the loop's 'init'. init: " - << init->ToString() << ", body: " << body_root->ToString(); + TF_RETURN_IF_ERROR(CheckWhileInstruction(instruction)); } auto previous = instructions.find(instruction->name()); diff --git a/tensorflow/compiler/xla/service/hlo_verifier.h b/tensorflow/compiler/xla/service/hlo_verifier.h index 1dd7ec3c51..1ec55a9bdc 100644 --- a/tensorflow/compiler/xla/service/hlo_verifier.h +++ b/tensorflow/compiler/xla/service/hlo_verifier.h @@ -102,7 +102,7 @@ class ShapeVerifier : public DfsHloVisitor { Status CheckTernaryShape(const HloInstruction* instruction); Status CheckVariadicShape(const HloInstruction* instruction); - // Checks if the given two instructions shares the same channel id. + // Checks if the given two instructions share the same channel id. Status CheckSameChannel(const HloInstruction* instr1, const HloInstruction* instr2); @@ -144,9 +144,11 @@ class HloVerifier : public HloPassInterface { // CHECKs various invariants of a fusion instruction. Status CheckFusionInstruction(HloInstruction* fusion) const; + Status CheckWhileInstruction(HloInstruction* instruction); + // Creates a ShapeVerifier that checks that shapes match inferred - // expectations. This is a factory function because ShapeVerifier, Note that - // ShapeVerifier, being a DfsHloVisitor, is stateful. We want a clean object + // expectations. This is a factory function because ShapeVerifier, + // being a DfsHloVisitor, is stateful. We want a clean object // for each run of the verifier. ShapeVerifierFactory shape_verifier_factory_; }; -- GitLab From 4e9dae45b3017f13eb68603294c6c28a63656050 Mon Sep 17 00:00:00 2001 From: Koan-Sin Tan Date: Fri, 20 Apr 2018 15:35:42 +0800 Subject: [PATCH 058/434] change ms to us and make timestamp uint64 1. microsecond usually is denoted as us; ms is millisecond 2. make timestamp uint64 all the way --- tensorflow/contrib/lite/profiling/profile_buffer.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tensorflow/contrib/lite/profiling/profile_buffer.h b/tensorflow/contrib/lite/profiling/profile_buffer.h index 3bfe02571b..299b2a9cad 100644 --- a/tensorflow/contrib/lite/profiling/profile_buffer.h +++ b/tensorflow/contrib/lite/profiling/profile_buffer.h @@ -37,9 +37,9 @@ struct ProfileEvent { // Label of the event. This usually describes the event. const char* tag; // Timestamp in microseconds when the event began. - int64_t begin_timestamp_ms; + uint64_t begin_timestamp_us; // Timestamp in microseconds when the event ended. - int64_t end_timestamp_ms; + uint64_t end_timestamp_us; // The field containing the type of event. This must be one of the event types // in EventType. EventType event_type; @@ -74,13 +74,13 @@ class ProfileBuffer { if (!enabled_) { return kInvalidEventHandle; } - int64_t timestamp = NowMicros(); + uint64_t timestamp = NowMicros(); int index = current_index_ % event_buffer_.size(); event_buffer_[index].tag = tag; event_buffer_[index].event_type = event_type; event_buffer_[index].event_metadata = event_metadata; - event_buffer_[index].begin_timestamp_ms = timestamp; - event_buffer_[index].end_timestamp_ms = 0; + event_buffer_[index].begin_timestamp_us = timestamp; + event_buffer_[index].end_timestamp_us = 0; current_index_++; return index; } @@ -103,7 +103,7 @@ class ProfileBuffer { } int event_index = event_handle % max_size; - event_buffer_[event_index].end_timestamp_ms = NowMicros(); + event_buffer_[event_index].end_timestamp_us = NowMicros(); } // Returns the size of the buffer. @@ -134,7 +134,7 @@ class ProfileBuffer { } private: - static int64_t NowMicros() { + static uint64_t NowMicros() { // TODO(shashishekhar): Refactor this to a separate file. struct timeval tv; gettimeofday(&tv, nullptr); -- GitLab From d3b91ba5696e998ea9155a91f58b6b6ba2afd340 Mon Sep 17 00:00:00 2001 From: Koan-Sin Tan Date: Fri, 20 Apr 2018 17:05:22 +0800 Subject: [PATCH 059/434] add profiling mechanism build with something like: ``` bazel build --config android_arm64 \ --cxxopt=-std=c++11 \ --cxxopt=-DTFLITE_PROFILING_ENABLED \ //tensorflow/contrib/lite/examples/label_image:label_image ``` run `label_image` will get something like: ``` ./label_image -p 1 Loaded model ./mobilenet_quant_v1_224.tflite resolved reporter invoked average time: 67.227 ms 13.349, Node 0, OpCode 3, CONV_2D 6.024, Node 1, OpCode 4, DEPTHWISE_CONV_2D 11.847, Node 2, OpCode 3, CONV_2D 3.927, Node 3, OpCode 4, DEPTHWISE_CONV_2D 1.905, Node 4, OpCode 3, CONV_2D 3.573, Node 5, OpCode 4, DEPTHWISE_CONV_2D 2.344, Node 6, OpCode 3, CONV_2D 0.964, Node 7, OpCode 4, DEPTHWISE_CONV_2D 1.224, Node 8, OpCode 3, CONV_2D 1.846, Node 9, OpCode 4, DEPTHWISE_CONV_2D 2.181, Node 10, OpCode 3, CONV_2D 0.454, Node 11, OpCode 4, DEPTHWISE_CONV_2D 0.997, Node 12, OpCode 3, CONV_2D 0.865, Node 13, OpCode 4, DEPTHWISE_CONV_2D 1.844, Node 14, OpCode 3, CONV_2D 0.753, Node 15, OpCode 4, DEPTHWISE_CONV_2D 1.724, Node 16, OpCode 3, CONV_2D 0.803, Node 17, OpCode 4, DEPTHWISE_CONV_2D 1.698, Node 18, OpCode 3, CONV_2D 0.794, Node 19, OpCode 4, DEPTHWISE_CONV_2D 1.754, Node 20, OpCode 3, CONV_2D 0.798, Node 21, OpCode 4, DEPTHWISE_CONV_2D 1.704, Node 22, OpCode 3, CONV_2D 0.204, Node 23, OpCode 4, DEPTHWISE_CONV_2D 0.983, Node 24, OpCode 3, CONV_2D 0.373, Node 25, OpCode 4, DEPTHWISE_CONV_2D 1.791, Node 26, OpCode 3, CONV_2D 0.067, Node 27, OpCode 1, AVERAGE_POOL_2D 0.388, Node 28, OpCode 3, CONV_2D 0.001, Node 29, OpCode 22, RESHAPE 0.035, Node 30, OpCode 25, SOFTMAX 0.600: 458 bow tie 0.365: 653 military uniform 0.008: 835 suit 0.008: 611 jersey 0.004: 514 cornet ``` --- .../lite/examples/label_image/label_image.cc | 47 +++++++++++++++++-- .../lite/examples/label_image/label_image.h | 1 + 2 files changed, 45 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/lite/examples/label_image/label_image.cc b/tensorflow/contrib/lite/examples/label_image/label_image.cc index a91467d345..71d24a7ea5 100644 --- a/tensorflow/contrib/lite/examples/label_image/label_image.cc +++ b/tensorflow/contrib/lite/examples/label_image/label_image.cc @@ -17,6 +17,7 @@ limitations under the License. #include #include #include +#include #include #include #include @@ -70,6 +71,23 @@ TfLiteStatus ReadLabelsFile(const string& file_name, return kTfLiteOk; } +void PrintProfilingInfo(const profiling::ProfileEvent* e, uint32_t op_index, + TfLiteRegistration registration) { + // output something like + // time (ms) , Node xxx, OpCode xxx, symblic name + // 5.352, Node 5, OpCode 4, DEPTHWISE_CONV_2D + + + LOG(INFO) << std::fixed << std::setw(10) << std::setprecision(3) + << (e->end_timestamp_us - e->begin_timestamp_us) / 1000.0 + << ", Node " << std::setw(3) << std::setprecision(3) << op_index + << ", OpCode " << std::setw(3) << std::setprecision(3) + << registration.builtin_code << ", " + << EnumNameBuiltinOperator( + (BuiltinOperator)registration.builtin_code) + << "\n"; +} + void RunInference(Settings* s) { if (!s->model_name.c_str()) { LOG(ERROR) << "no model file name\n"; @@ -89,7 +107,7 @@ void RunInference(Settings* s) { tflite::ops::builtin::BuiltinOpResolver resolver; - tflite::InterpreterBuilder(*model, resolver)(&interpreter); + tflite::InterpreterBuilder (*model, resolver)(&interpreter); if (!interpreter) { LOG(FATAL) << "Failed to construct interpreter\n"; exit(-1); @@ -166,6 +184,11 @@ void RunInference(Settings* s) { exit(-1); } + profiling::Profiler* profiler = new profiling::Profiler(); + interpreter->SetProfiler(profiler); + + if (s->profiling) profiler->StartProfiling(); + struct timeval start_time, stop_time; gettimeofday(&start_time, NULL); for (int i = 0; i < s->loop_count; i++) { @@ -179,6 +202,18 @@ void RunInference(Settings* s) { << (get_us(stop_time) - get_us(start_time)) / (s->loop_count * 1000) << " ms \n"; + if (s->profiling) { + profiler->StopProfiling(); + auto profile_events = profiler->GetProfileEvents(); + for (int i = 0; i < profile_events.size(); i++) { + auto op_index = profile_events[i]->event_metadata; + const auto node_and_registration = + interpreter->node_and_registration(op_index); + const TfLiteRegistration registration = node_and_registration->second; + PrintProfilingInfo(profile_events[i], op_index, registration); + } + } + const int output_size = 1000; const size_t num_results = 5; const float threshold = 0.001f; @@ -217,13 +252,14 @@ void RunInference(Settings* s) { void display_usage() { LOG(INFO) << "label_image\n" - << "--accelerated, -a: [0|1], use Android NNAPI or note\n" + << "--accelerated, -a: [0|1], use Android NNAPI or not\n" << "--count, -c: loop interpreter->Invoke() for certain times\n" << "--input_mean, -b: input mean\n" << "--input_std, -s: input standard deviation\n" << "--image, -i: image_name.bmp\n" << "--labels, -l: labels for the model\n" << "--tflite_model, -m: model_name.tflite\n" + << "--profiling, -p: [0|1], profiling or not\n" << "--threads, -t: number of threads\n" << "--verbose, -v: [0|1] print more information\n" << "\n"; @@ -241,6 +277,7 @@ int Main(int argc, char** argv) { {"image", required_argument, 0, 'i'}, {"labels", required_argument, 0, 'l'}, {"tflite_model", required_argument, 0, 'm'}, + {"profiling", required_argument, 0, 'p'}, {"threads", required_argument, 0, 't'}, {"input_mean", required_argument, 0, 'b'}, {"input_std", required_argument, 0, 's'}, @@ -249,7 +286,7 @@ int Main(int argc, char** argv) { /* getopt_long stores the option index here. */ int option_index = 0; - c = getopt_long(argc, argv, "a:b:c:f:i:l:m:s:t:v:", long_options, + c = getopt_long(argc, argv, "a:b:c:f:i:l:m:p:s:t:v:", long_options, &option_index); /* Detect the end of the options. */ @@ -276,6 +313,10 @@ int Main(int argc, char** argv) { case 'm': s.model_name = optarg; break; + case 'p': + s.profiling = strtol( // NOLINT(runtime/deprecated_fn) + optarg, (char**)NULL, 10); + break; case 's': s.input_std = strtod(optarg, NULL); break; diff --git a/tensorflow/contrib/lite/examples/label_image/label_image.h b/tensorflow/contrib/lite/examples/label_image/label_image.h index 4de32e33fb..4b48014e1c 100644 --- a/tensorflow/contrib/lite/examples/label_image/label_image.h +++ b/tensorflow/contrib/lite/examples/label_image/label_image.h @@ -25,6 +25,7 @@ struct Settings { bool verbose = false; bool accel = false; bool input_floating = false; + bool profiling = false; int loop_count = 1; float input_mean = 127.5f; float input_std = 127.5f; -- GitLab From 9e0037513040fd09ee01442bd062936b41bee40c Mon Sep 17 00:00:00 2001 From: SukHwan Kim <30820468+jerry4897@users.noreply.github.com> Date: Fri, 20 Apr 2018 18:24:52 +0900 Subject: [PATCH 060/434] Update c_api_test.cc Typo --- tensorflow/c/c_api_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/c/c_api_test.cc b/tensorflow/c/c_api_test.cc index ca80db23ed..9b86425aa5 100644 --- a/tensorflow/c/c_api_test.cc +++ b/tensorflow/c/c_api_test.cc @@ -1700,7 +1700,7 @@ TEST_F(CApiGradientsTest, OpWithNoGradientRegistered_NoGradInputs) { TestGradientsError(false); } -// REGISTER_OP for CApiTestAttributesTest test cases. +// REGISTER_OP for CApiAttributesTest test cases. // Registers two ops, each with a single attribute called 'v'. // The attribute in one op will have a type 'type', the other // will have list(type). -- GitLab From 1ad32703d4e728d8fba835aaf24418f19cf85dbe Mon Sep 17 00:00:00 2001 From: Peter Hawkins Date: Fri, 20 Apr 2018 03:29:31 -0700 Subject: [PATCH 061/434] [TF:XLA] Implement ClipByValue. PiperOrigin-RevId: 193646890 --- tensorflow/compiler/tests/ternary_ops_test.py | 18 ++++++ tensorflow/compiler/tf2xla/kernels/BUILD | 1 + .../tf2xla/kernels/clip_by_value_op.cc | 61 +++++++++++++++++++ 3 files changed, 80 insertions(+) create mode 100644 tensorflow/compiler/tf2xla/kernels/clip_by_value_op.cc diff --git a/tensorflow/compiler/tests/ternary_ops_test.py b/tensorflow/compiler/tests/ternary_ops_test.py index ba5f829936..75a2cf07c5 100644 --- a/tensorflow/compiler/tests/ternary_ops_test.py +++ b/tensorflow/compiler/tests/ternary_ops_test.py @@ -23,6 +23,7 @@ import numpy as np from tensorflow.compiler.tests.xla_test import XLATestCase from tensorflow.python.framework import dtypes from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gen_math_ops from tensorflow.python.ops import math_ops from tensorflow.python.platform import googletest @@ -119,6 +120,23 @@ class TernaryOpsTest(XLATestCase): np.array([2, 1], dtype=np.int32), expected=np.array([[2], [5]], dtype=dtype)) + def testClipByValue(self): + # TODO(b/78258593): enable integer types here too. + for dtype in self.float_types: + test_cases = [ + (np.array([2, 4, 5], dtype=dtype), dtype(7)), # + (dtype(1), np.array([2, 4, 5], dtype=dtype)), # + (np.array([-2, 7, 7], dtype=dtype), np.array([-2, 9, 8], dtype=dtype)) + ] + x = np.array([-2, 10, 6], dtype=dtype) + for lower, upper in test_cases: + self._testTernary( + gen_math_ops._clip_by_value, + x, + lower, + upper, + expected=np.minimum(np.maximum(x, lower), upper)) + if __name__ == "__main__": googletest.main() diff --git a/tensorflow/compiler/tf2xla/kernels/BUILD b/tensorflow/compiler/tf2xla/kernels/BUILD index 579b669699..00fd08b1a0 100644 --- a/tensorflow/compiler/tf2xla/kernels/BUILD +++ b/tensorflow/compiler/tf2xla/kernels/BUILD @@ -21,6 +21,7 @@ tf_kernel_library( "cast_op.cc", "categorical_op.cc", "cholesky_op.cc", + "clip_by_value_op.cc", "concat_op.cc", "const_op.cc", "conv_ops.cc", diff --git a/tensorflow/compiler/tf2xla/kernels/clip_by_value_op.cc b/tensorflow/compiler/tf2xla/kernels/clip_by_value_op.cc new file mode 100644 index 0000000000..fdf75be7b1 --- /dev/null +++ b/tensorflow/compiler/tf2xla/kernels/clip_by_value_op.cc @@ -0,0 +1,61 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/tf2xla/xla_op_kernel.h" +#include "tensorflow/compiler/tf2xla/xla_op_registry.h" +#include "tensorflow/core/framework/tensor_shape.h" + +namespace tensorflow { +namespace { + +class ClipByValueOp : public XlaOpKernel { + public: + explicit ClipByValueOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {} + + void Compile(XlaOpKernelContext* ctx) override { + const TensorShape shape = ctx->InputShape(0); + const TensorShape min_shape = ctx->InputShape(1); + const TensorShape max_shape = ctx->InputShape(2); + + xla::ComputationBuilder* builder = ctx->builder(); + auto input = ctx->Input(0); + auto min = ctx->Input(1); + auto max = ctx->Input(2); + + auto shape_error = [&]() -> tensorflow::Status { + return errors::InvalidArgument( + "clip_value_min and clip_value_max must be either of " + "the same shape as input, or a scalar. ", + "Input shape: ", shape.DebugString(), + " clip_value_min shape: ", min_shape.DebugString(), + " clip_value_max shape: ", max_shape.DebugString()); + }; + + if (shape != min_shape) { + OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(min_shape), shape_error()); + min = builder->Broadcast(min, shape.dim_sizes()); + } + if (shape != max_shape) { + OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(max_shape), shape_error()); + max = builder->Broadcast(max, shape.dim_sizes()); + } + ctx->SetOutput(0, builder->Clamp(min, input, max)); + } +}; + +REGISTER_XLA_OP(Name("ClipByValue"), ClipByValueOp); + +} // namespace +} // namespace tensorflow -- GitLab From f0df6701d01954073e912f24f7c983de4f091a1e Mon Sep 17 00:00:00 2001 From: joel-shor Date: Fri, 20 Apr 2018 14:01:02 +0300 Subject: [PATCH 062/434] [tf.data] Check in a strictly faster rejection resampling transformation. This transformation is faster because it rejects fewer data. This is done by occasionally sampling from the original data distribution in an efficient way. Tested: bazel test :resample_test --- .../data/python/kernel_tests/resample_test.py | 128 +++++++-- .../contrib/data/python/ops/resampling.py | 267 ++++++++++++++---- 2 files changed, 327 insertions(+), 68 deletions(-) diff --git a/tensorflow/contrib/data/python/kernel_tests/resample_test.py b/tensorflow/contrib/data/python/kernel_tests/resample_test.py index 5f47dcb339..9e1273eba1 100644 --- a/tensorflow/contrib/data/python/kernel_tests/resample_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/resample_test.py @@ -18,6 +18,8 @@ from __future__ import division from __future__ import print_function import numpy as np +import time +from absl.testing import parameterized from tensorflow.contrib.data.python.ops import resampling from tensorflow.python.data.ops import dataset_ops @@ -30,47 +32,70 @@ from tensorflow.python.platform import test from tensorflow.python.util import compat -class ResampleTest(test.TestCase): +def _time_resampling( + test_obj, data_np, target_dist, init_dist, use_v2, num_to_sample): + dataset = dataset_ops.Dataset.from_tensor_slices(data_np).repeat() - def testInitialKnownDistribution(self): - self._testDistribution(initial_known=True) + # Reshape distribution via rejection sampling. + apply_fn = (resampling.rejection_resample_v2 if use_v2 else + resampling.rejection_resample) + dataset = dataset.apply( + apply_fn( + class_func=lambda x: x, + target_dist=target_dist, + initial_dist=init_dist, + seed=142)) - def testInitialNotKnownDistribution(self): - self._testDistribution(initial_known=False) + get_next = dataset.make_one_shot_iterator().get_next() - def _testDistribution(self, initial_known): + with test_obj.test_session() as sess: + start_time = time.time() + for _ in xrange(num_to_sample): + sess.run(get_next) + end_time = time.time() + + return end_time - start_time + + +class ResampleTest(test.TestCase, parameterized.TestCase): + + @parameterized.named_parameters( + ('InitialnDistributionKnown', True, False), + ('InitialDistributionUnknown', False, False), + ('InitialDistributionKnownV2', True, True), + ('InitialDistributionUnknownV2', False, True)) + def testDistribution(self, initial_known, use_v2): classes = np.random.randint(5, size=(20000,)) # Uniformly sampled target_dist = [0.9, 0.05, 0.05, 0.0, 0.0] initial_dist = [0.2] * 5 if initial_known else None - iterator = (dataset_ops.Dataset.from_tensor_slices(classes).shuffle( - 200, seed=21).map(lambda c: (c, string_ops.as_string(c))).apply( - resampling.rejection_resample( - target_dist=target_dist, - initial_dist=initial_dist, - class_func=lambda c, _: c, - seed=27)).make_one_shot_iterator()) - get_next = iterator.get_next() + dataset = dataset_ops.Dataset.from_tensor_slices(classes).shuffle( + 200, seed=21).map(lambda c: (c, string_ops.as_string(c))).repeat() + apply_fn = (resampling.rejection_resample_v2 if use_v2 else + resampling.rejection_resample) + get_next = dataset.apply( + apply_fn( + target_dist=target_dist, + initial_dist=initial_dist, + class_func=lambda c, _: c, + seed=27)).make_one_shot_iterator().get_next() with self.test_session() as sess: returned = [] - with self.assertRaises(errors.OutOfRangeError): - while True: - returned.append(sess.run(get_next)) + while len(returned) < 4000: + returned.append(sess.run(get_next)) returned_classes, returned_classes_and_data = zip(*returned) _, returned_data = zip(*returned_classes_and_data) self.assertAllEqual([compat.as_bytes(str(c)) for c in returned_classes], returned_data) total_returned = len(returned_classes) - # Subsampling rejects a large percentage of the initial data in - # this case. - self.assertGreater(total_returned, 20000 * 0.2) class_counts = np.array([ len([True for v in returned_classes if v == c]) for c in range(5)]) returned_dist = class_counts / total_returned self.assertAllClose(target_dist, returned_dist, atol=1e-2) + def testRandomClasses(self): init_dist = [0.25, 0.25, 0.25, 0.25] target_dist = [0.0, 0.0, 0.0, 1.0] @@ -109,5 +134,68 @@ class ResampleTest(test.TestCase): self.assertAllClose(target_dist, bincount, atol=1e-2) + @parameterized.named_parameters( + ('InitialnDistributionKnown', True, False), + ('InitialDistributionUnknown', False, False), + ('InitialDistributionKnownV2', True, True), + ('InitialDistributionUnknownV2', False, True)) + def _testNewResampleIsFaster(self, target_dist, num_to_sample): + init_dist = [0.25, 0.25, 0.25, 0.25] + num_classes = len(init_dist) + num_samples = 1000 + data_np = np.random.choice(num_classes, num_samples, p=init_dist) + + fast_time = _time_resampling(self, data_np, target_dist, init_dist, + use_v2=True, num_to_sample=num_to_sample) + slow_time = _time_resampling(self, data_np, target_dist, init_dist, + use_v2=False, num_to_sample=num_to_sample) + + self.assertLess(fast_time, slow_time) + + + def testNewResampleIsFasterSmallSkewManySamples(self): + self._testNewResampleIsFaster([0.1, 0.1, 0.1, 0.7], 1000) + + def testNewResampleIsFasterBigSkewManySamples(self): + self._testNewResampleIsFaster([0.01, 0.01, 0.01, 0.97], 1000) + + def testNewResampleIsFasterSmallSkewFewSamples(self): + self._testNewResampleIsFaster([0.1, 0.1, 0.1, 0.7], 100) + + def testNewResampleIsFasterBigSkewFewSamples(self): + self._testNewResampleIsFaster([0.01, 0.01, 0.01, 0.97], 100) + + +class MapDatasetBenchmark(test.Benchmark): + + def benchmarkResamplePerformance(self): + init_dist = [0.25, 0.25, 0.25, 0.25] + target_dist = [0.0, 0.0, 0.0, 1.0] + num_classes = len(init_dist) + # We don't need many samples to test a dirac-delta target distribution + num_samples = 1000 + data_np = np.random.choice(num_classes, num_samples, p=init_dist) + + resample_time = _time_resampling( + self, data_np, target_dist, init_dist, use_v2=False, num_to_sample=1000) + + self.report_benchmark( + iters=1000, wall_time=resample_time, name="benchmark_resample") + + def benchmarkResampleAndBatchPerformance(self): + init_dist = [0.25, 0.25, 0.25, 0.25] + target_dist = [0.0, 0.0, 0.0, 1.0] + num_classes = len(init_dist) + # We don't need many samples to test a dirac-delta target distribution + num_samples = 1000 + data_np = np.random.choice(num_classes, num_samples, p=init_dist) + + resample_time = _time_resampling( + self, data_np, target_dist, init_dist, use_v2=True, num_to_sample=1000) + + self.report_benchmark( + iters=1000, wall_time=resample_time, name="benchmark_resample_v2") + + if __name__ == "__main__": test.main() diff --git a/tensorflow/contrib/data/python/ops/resampling.py b/tensorflow/contrib/data/python/ops/resampling.py index b465397437..94e28b9a2d 100644 --- a/tensorflow/contrib/data/python/ops/resampling.py +++ b/tensorflow/contrib/data/python/ops/resampling.py @@ -20,6 +20,7 @@ from __future__ import print_function import numpy as np from tensorflow.contrib.data.python.ops import batching +from tensorflow.contrib.data.python.ops import interleave_ops from tensorflow.contrib.data.python.ops import scan_ops from tensorflow.python.data.ops import dataset_ops from tensorflow.python.framework import dtypes @@ -50,14 +51,15 @@ def rejection_resample(class_func, target_dist, initial_dist=None, seed=None): A `Dataset` transformation function, which can be passed to @{tf.data.Dataset.apply}. """ - def _apply_fn(dataset): """Function from `Dataset` to `Dataset` that applies the transformation.""" - dist_estimation_batch_size = 32 target_dist_t = ops.convert_to_tensor(target_dist, name="target_dist") class_values_ds = dataset.map(class_func) + + # Get initial distribution. if initial_dist is not None: - initial_dist_t = ops.convert_to_tensor(initial_dist, name="initial_dist") + initial_dist_t = ops.convert_to_tensor( + initial_dist, name="initial_dist") acceptance_dist = _calculate_acceptance_probs(initial_dist_t, target_dist_t) initial_dist_ds = dataset_ops.Dataset.from_tensors( @@ -65,55 +67,181 @@ def rejection_resample(class_func, target_dist, initial_dist=None, seed=None): acceptance_dist_ds = dataset_ops.Dataset.from_tensors( acceptance_dist).repeat() else: - num_classes = (target_dist_t.shape[0].value or - array_ops.shape(target_dist_t)[0]) - smoothing_constant = 10 - initial_examples_per_class_seen = array_ops.fill( - [num_classes], np.int64(smoothing_constant)) - - def update_estimate_and_tile(num_examples_per_class_seen, c): - updated_examples_per_class_seen, dist = _estimate_data_distribution( - c, num_examples_per_class_seen) - tiled_dist = array_ops.tile( - array_ops.expand_dims(dist, 0), [dist_estimation_batch_size, 1]) - return updated_examples_per_class_seen, tiled_dist - - initial_dist_ds = (class_values_ds.batch(dist_estimation_batch_size) - .apply(scan_ops.scan(initial_examples_per_class_seen, - update_estimate_and_tile)) - .apply(batching.unbatch())) + initial_dist_ds = _estimate_initial_dist_ds( + target_dist_t, class_values_ds) acceptance_dist_ds = initial_dist_ds.map( lambda initial: _calculate_acceptance_probs(initial, target_dist_t)) + return _filter_ds(dataset, acceptance_dist_ds, initial_dist_ds, + class_values_ds, seed) + + return _apply_fn + - def maybe_warn_on_large_rejection(accept_dist, initial_dist): - proportion_rejected = math_ops.reduce_sum( - (1 - accept_dist) * initial_dist) - return control_flow_ops.cond( - math_ops.less(proportion_rejected, .5), - lambda: accept_dist, - lambda: logging_ops.Print( # pylint: disable=g-long-lambda - accept_dist, [proportion_rejected, initial_dist, accept_dist], - message="Proportion of examples rejected by sampler is high: ", - summarize=100, - first_n=10)) - - acceptance_dist_ds = (dataset_ops.Dataset.zip((acceptance_dist_ds, - initial_dist_ds)) - .map(maybe_warn_on_large_rejection)) - - def _gather_and_copy(class_val, acceptance_prob, data): - return (class_val, array_ops.gather(acceptance_prob, class_val), data) - current_probabilities_and_class_and_data_ds = dataset_ops.Dataset.zip( - (class_values_ds, acceptance_dist_ds, dataset)).map(_gather_and_copy) - filtered_ds = ( - current_probabilities_and_class_and_data_ds - .filter(lambda _1, p, _2: random_ops.random_uniform([], seed=seed) < p)) - return filtered_ds.map(lambda class_value, _, data: (class_value, data)) +def rejection_resample_v2(class_func, target_dist, initial_dist=None, + seed=None): + """A transformation that resamples a dataset to achieve a target distribution. + This differs from v1 in that it will also sample from the original dataset + with some probability, so it makes strictly fewer data rejections. This + transformation is faster than the original. + + **NOTE** Resampling is performed via rejection sampling; some fraction + of the input values will be dropped. + + Args: + class_func: A function mapping an element of the input dataset to a scalar + `tf.int32` tensor. Values should be in `[0, num_classes)`. + target_dist: A floating point type tensor, shaped `[num_classes]`. + initial_dist: (Optional.) A floating point type tensor, shaped + `[num_classes]`. If not provided, the true class distribution is + estimated live in a streaming fashion. + seed: (Optional.) Python integer seed for the resampler. + + Returns: + A `Dataset` transformation function, which can be passed to + @{tf.data.Dataset.apply}. + """ + def _apply_fn(dataset): + """Function from `Dataset` to `Dataset` that applies the transformation.""" + target_dist_t = ops.convert_to_tensor(target_dist, name="target_dist") + class_values_ds = dataset.map(class_func) + + # Get initial distribution. + if initial_dist is not None: + initial_dist_t = ops.convert_to_tensor( + initial_dist, name="initial_dist") + acceptance_dist, prob_of_original = ( + _calculate_acceptance_probs_with_mixing(initial_dist_t, + target_dist_t)) + initial_dist_ds = dataset_ops.Dataset.from_tensors( + initial_dist_t).repeat() + acceptance_dist_ds = dataset_ops.Dataset.from_tensors( + acceptance_dist).repeat() + prob_of_original_ds = dataset_ops.Dataset.from_tensors( + prob_of_original).repeat() + else: + initial_dist_ds = _estimate_initial_dist_ds( + target_dist_t, class_values_ds) + acceptance_and_original_prob_ds = initial_dist_ds.map( + lambda initial: _calculate_acceptance_probs_with_mixing( + initial, target_dist_t)) + acceptance_dist_ds = acceptance_and_original_prob_ds.map( + lambda accept_prob, _: accept_prob) + prob_of_original_ds = acceptance_and_original_prob_ds.map( + lambda _, prob_original: prob_original) + filtered_ds = _filter_ds(dataset, acceptance_dist_ds, initial_dist_ds, + class_values_ds, seed) + # Prefetch filtered dataset for speed. + filtered_ds = filtered_ds.prefetch(3) + + return interleave_ops.sample_from_datasets( + [dataset_ops.Dataset.zip((class_values_ds, dataset)), filtered_ds], + weights=prob_of_original_ds.map(lambda prob: [(prob, 1.0 - prob)]), + seed=seed) return _apply_fn +def _random_interleave_datasets(ds1, ds1_classes, ds2, prob_of_ds1, seed=None): + """Randomly interleave datasets. + + We carefully combine `ds1` and 'ds2' so that we don't needlessly compute the + filtering. + + Args: + ds1: A dataset to interleave. + ds1_classes: Dataset of class values associated with ds1. + ds2: Another dataset to interleave. + prob_of_ds1: A dataset of probabilities. Each probability represents the + likelihood of drawing from `ds1`. + seed: (Optional.) Python integer seed for the resampler. + + Returns: + A single dataset, combined from `ds1` and `ds2`. + """ + num_filtered_to_prefetch = 3 + ds2 = ds2.prefetch(num_filtered_to_prefetch) + filtered_iterator = ds2.make_one_shot_iterator() + combined_ds = dataset_ops.Dataset.zip( + (ds1_classes, ds1, prob_of_ds1)).map( + lambda ds1_class, original_data, prob_of_original: + control_flow_ops.cond( + random_ops.random_uniform([], seed=seed) < prob_of_original, + lambda: (ds1_class, original_data), + filtered_iterator.get_next)) + return combined_ds + + +def _filter_ds(dataset, acceptance_dist_ds, initial_dist_ds, class_values_ds, + seed): + """Filters a dataset based on per-class acceptance probabilities. + + Args: + dataset: The dataset to be filtered. + acceptance_dist_ds: A dataset of acceptance probabilities. + initial_dist_ds: A dataset of the initial probability distribution, given or + estimated. + class_values_ds: A dataset of the corresponding classes. + seed: (Optional.) Python integer seed for the resampler. + + Returns: + A dataset of (class value, data) after filtering. + """ + def maybe_warn_on_large_rejection(accept_dist, initial_dist): + proportion_rejected = math_ops.reduce_sum((1 - accept_dist) * initial_dist) + return control_flow_ops.cond( + math_ops.less(proportion_rejected, .5), + lambda: accept_dist, + lambda: logging_ops.Print( # pylint: disable=g-long-lambda + accept_dist, [proportion_rejected, initial_dist, accept_dist], + message="Proportion of examples rejected by sampler is high: ", + summarize=100, + first_n=10)) + + acceptance_dist_ds = (dataset_ops.Dataset.zip((acceptance_dist_ds, + initial_dist_ds)) + .map(maybe_warn_on_large_rejection)) + + def _gather_and_copy(class_val, acceptance_prob, data): + return class_val, array_ops.gather(acceptance_prob, class_val), data + + current_probabilities_and_class_and_data_ds = dataset_ops.Dataset.zip( + (class_values_ds, acceptance_dist_ds, dataset)).map(_gather_and_copy) + filtered_ds = ( + current_probabilities_and_class_and_data_ds + .filter(lambda _1, p, _2: random_ops.random_uniform([], seed=seed) < p)) + return filtered_ds.map(lambda class_value, _, data: (class_value, data)) + + +def _estimate_initial_dist_ds( + target_dist_t, class_values_ds, dist_estimation_batch_size=32, + smoothing_constant=10): + num_classes = (target_dist_t.shape[0].value or + array_ops.shape(target_dist_t)[0]) + initial_examples_per_class_seen = array_ops.fill( + [num_classes], np.int64(smoothing_constant)) + + def update_estimate_and_tile(num_examples_per_class_seen, c): + updated_examples_per_class_seen, dist = _estimate_data_distribution( + c, num_examples_per_class_seen) + tiled_dist = array_ops.tile( + array_ops.expand_dims(dist, 0), [dist_estimation_batch_size, 1]) + return updated_examples_per_class_seen, tiled_dist + + initial_dist_ds = (class_values_ds.batch(dist_estimation_batch_size) + .apply(scan_ops.scan(initial_examples_per_class_seen, + update_estimate_and_tile)) + .apply(batching.unbatch())) + + return initial_dist_ds + + +def _get_target_to_initial_ratio(initial_probs, target_probs): + # Add tiny to initial_probs to avoid divide by zero. + denom = (initial_probs + np.finfo(initial_probs.dtype.as_numpy_dtype).tiny) + return target_probs / denom + + def _calculate_acceptance_probs(initial_probs, target_probs): """Calculate the per-class acceptance rates. @@ -152,13 +280,10 @@ def _calculate_acceptance_probs(initial_probs, target_probs): 0 <= t_i <= 1, sum_i(t_i) = 1 ``` - A solution for a_i in terms of the other variables is the following: ```a_i = (t_i / p_i) / max_i[t_i / p_i]``` """ - # Add tiny to initial_probs to avoid divide by zero. - denom = (initial_probs + np.finfo(initial_probs.dtype.as_numpy_dtype).tiny) - ratio_l = target_probs / denom + ratio_l = _get_target_to_initial_ratio(initial_probs, target_probs) # Calculate list of acceptance probabilities. max_ratio = math_ops.reduce_max(ratio_l) @@ -188,3 +313,49 @@ def _estimate_data_distribution(c, num_examples_per_class_seen): math_ops.reduce_sum(num_examples_per_class_seen)) dist = math_ops.cast(init_prob_estimate, dtypes.float32) return num_examples_per_class_seen, dist + + +def _calculate_acceptance_probs_with_mixing(initial_probs, target_probs): + """Calculates the acceptance probabilities and mixing ratio. + + In this case, we assume that we can *either* sample from the original data + distribution with probability `m`, or sample from a reshaped distribution + that comes from rejection sampling on the original distribution. This + rejection sampling is done on a per-class basis, with `a_i` representing the + probability of accepting data from class `i`. + + If we try to minimize the amount of data rejected, we get the following: + + M_max = max_i [ t_i / p_i ] + M_min = min_i [ t_i / p_i ] + + The desired probability of accepting data if it comes from class `i`: + + a_i = (t_i/p_i - m) / (M_max - m) + + The desired probability of pulling a data element from the original dataset, + rather than the filtered one: + + m = M_min + + See the docstring for `_calculate_acceptance_probs` for more details. + + Args: + initial_probs: A Tensor of the initial probability distribution, given or + estimated. + target_probs: A Tensor of the corresponding classes. + + Returns: + (A 1D Tensor with the per-class acceptance probabilities, the desired + probability of pull from the original distribution.) + """ + ratio_l = _get_target_to_initial_ratio(initial_probs, target_probs) + max_ratio = math_ops.reduce_max(ratio_l) + min_ratio = math_ops.reduce_min(ratio_l) + + # Target prob to sample from original distribution. + m = min_ratio + + # TODO(joelshor): Simplify fraction, if possible. + a_i = (ratio_l - m) / (max_ratio - m) + return a_i, m -- GitLab From b1067116c6a2351f4c597a9391b21ad0f513565b Mon Sep 17 00:00:00 2001 From: joel-shor Date: Fri, 20 Apr 2018 14:27:30 +0300 Subject: [PATCH 063/434] [tf.data] Clean up resampler and update BUILD files. --- .../contrib/data/python/kernel_tests/BUILD | 6 ++- .../data/python/kernel_tests/resample_test.py | 32 +++++---------- tensorflow/contrib/data/python/ops/BUILD | 2 + .../contrib/data/python/ops/resampling.py | 40 ++++--------------- 4 files changed, 23 insertions(+), 57 deletions(-) diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD index b15b9663f4..a6b46b37e7 100644 --- a/tensorflow/contrib/data/python/kernel_tests/BUILD +++ b/tensorflow/contrib/data/python/kernel_tests/BUILD @@ -308,13 +308,17 @@ py_test( srcs_version = "PY2AND3", tags = ["noasan"], deps = [ + "//third_party/py/absl/testing:parameterized", + "//third_party/py/numpy", "//tensorflow/contrib/data/python/ops:resampling", "//tensorflow/python:client_testlib", + "//tensorflow/python:dtypes", "//tensorflow/python:errors", + "//tensorflow/python:math_ops", + "//tensorflow/python:random_ops", "//tensorflow/python:string_ops", "//tensorflow/python:util", "//tensorflow/python/data/ops:dataset_ops", - "//third_party/py/numpy", ], ) diff --git a/tensorflow/contrib/data/python/kernel_tests/resample_test.py b/tensorflow/contrib/data/python/kernel_tests/resample_test.py index 9e1273eba1..97c4b68cb6 100644 --- a/tensorflow/contrib/data/python/kernel_tests/resample_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/resample_test.py @@ -60,10 +60,10 @@ def _time_resampling( class ResampleTest(test.TestCase, parameterized.TestCase): @parameterized.named_parameters( - ('InitialnDistributionKnown', True, False), - ('InitialDistributionUnknown', False, False), - ('InitialDistributionKnownV2', True, True), - ('InitialDistributionUnknownV2', False, True)) + ("InitialnDistributionKnown", True, False), + ("InitialDistributionUnknown", False, False), + ("InitialDistributionKnownV2", True, True), + ("InitialDistributionUnknownV2", False, True)) def testDistribution(self, initial_known, use_v2): classes = np.random.randint(5, size=(20000,)) # Uniformly sampled target_dist = [0.9, 0.05, 0.05, 0.0, 0.0] @@ -95,7 +95,6 @@ class ResampleTest(test.TestCase, parameterized.TestCase): returned_dist = class_counts / total_returned self.assertAllClose(target_dist, returned_dist, atol=1e-2) - def testRandomClasses(self): init_dist = [0.25, 0.25, 0.25, 0.25] target_dist = [0.0, 0.0, 0.0, 1.0] @@ -135,11 +134,11 @@ class ResampleTest(test.TestCase, parameterized.TestCase): self.assertAllClose(target_dist, bincount, atol=1e-2) @parameterized.named_parameters( - ('InitialnDistributionKnown', True, False), - ('InitialDistributionUnknown', False, False), - ('InitialDistributionKnownV2', True, True), - ('InitialDistributionUnknownV2', False, True)) - def _testNewResampleIsFaster(self, target_dist, num_to_sample): + ("SmallSkewManySamples", [0.1, 0.1, 0.1, 0.7], 1000), + ("BigSkewManySamples", [0.01, 0.01, 0.01, 0.97], 1000), + ("SmallSkewFewSamples", [0.1, 0.1, 0.1, 0.7], 100), + ("BigSkewFewSamples", [0.01, 0.01, 0.01, 0.97], 100)) + def testNewResampleIsFaster(self, target_dist, num_to_sample): init_dist = [0.25, 0.25, 0.25, 0.25] num_classes = len(init_dist) num_samples = 1000 @@ -153,19 +152,6 @@ class ResampleTest(test.TestCase, parameterized.TestCase): self.assertLess(fast_time, slow_time) - def testNewResampleIsFasterSmallSkewManySamples(self): - self._testNewResampleIsFaster([0.1, 0.1, 0.1, 0.7], 1000) - - def testNewResampleIsFasterBigSkewManySamples(self): - self._testNewResampleIsFaster([0.01, 0.01, 0.01, 0.97], 1000) - - def testNewResampleIsFasterSmallSkewFewSamples(self): - self._testNewResampleIsFaster([0.1, 0.1, 0.1, 0.7], 100) - - def testNewResampleIsFasterBigSkewFewSamples(self): - self._testNewResampleIsFaster([0.01, 0.01, 0.01, 0.97], 100) - - class MapDatasetBenchmark(test.Benchmark): def benchmarkResamplePerformance(self): diff --git a/tensorflow/contrib/data/python/ops/BUILD b/tensorflow/contrib/data/python/ops/BUILD index e00f2304cc..8cb4fa7f14 100644 --- a/tensorflow/contrib/data/python/ops/BUILD +++ b/tensorflow/contrib/data/python/ops/BUILD @@ -193,7 +193,9 @@ py_library( srcs_version = "PY2AND3", deps = [ ":batching", + ":interleave_ops", ":scan_ops", + "//third_party/py/numpy", "//tensorflow/python:array_ops", "//tensorflow/python:control_flow_ops", "//tensorflow/python:dtypes", diff --git a/tensorflow/contrib/data/python/ops/resampling.py b/tensorflow/contrib/data/python/ops/resampling.py index 94e28b9a2d..16d851bf96 100644 --- a/tensorflow/contrib/data/python/ops/resampling.py +++ b/tensorflow/contrib/data/python/ops/resampling.py @@ -82,8 +82,12 @@ def rejection_resample_v2(class_func, target_dist, initial_dist=None, """A transformation that resamples a dataset to achieve a target distribution. This differs from v1 in that it will also sample from the original dataset - with some probability, so it makes strictly fewer data rejections. This - transformation is faster than the original. + with some probability, so it makes strictly fewer data rejections. Due to an + implementation detail it must initialize a separate dataset initializer, so + the dataset becomes stateful after this transformation is applied + (`make_one_shot_iterator` won't work; users must use + `make_initializable_iterator`). This transformation is faster than the + original, except for overhead. **NOTE** Resampling is performed via rejection sampling; some fraction of the input values will be dropped. @@ -142,36 +146,6 @@ def rejection_resample_v2(class_func, target_dist, initial_dist=None, return _apply_fn -def _random_interleave_datasets(ds1, ds1_classes, ds2, prob_of_ds1, seed=None): - """Randomly interleave datasets. - - We carefully combine `ds1` and 'ds2' so that we don't needlessly compute the - filtering. - - Args: - ds1: A dataset to interleave. - ds1_classes: Dataset of class values associated with ds1. - ds2: Another dataset to interleave. - prob_of_ds1: A dataset of probabilities. Each probability represents the - likelihood of drawing from `ds1`. - seed: (Optional.) Python integer seed for the resampler. - - Returns: - A single dataset, combined from `ds1` and `ds2`. - """ - num_filtered_to_prefetch = 3 - ds2 = ds2.prefetch(num_filtered_to_prefetch) - filtered_iterator = ds2.make_one_shot_iterator() - combined_ds = dataset_ops.Dataset.zip( - (ds1_classes, ds1, prob_of_ds1)).map( - lambda ds1_class, original_data, prob_of_original: - control_flow_ops.cond( - random_ops.random_uniform([], seed=seed) < prob_of_original, - lambda: (ds1_class, original_data), - filtered_iterator.get_next)) - return combined_ds - - def _filter_ds(dataset, acceptance_dist_ds, initial_dist_ds, class_values_ds, seed): """Filters a dataset based on per-class acceptance probabilities. @@ -358,4 +332,4 @@ def _calculate_acceptance_probs_with_mixing(initial_probs, target_probs): # TODO(joelshor): Simplify fraction, if possible. a_i = (ratio_l - m) / (max_ratio - m) - return a_i, m + return a_i, m \ No newline at end of file -- GitLab From 0cba8b7c66bead25ed2e6e1c6bf5a23d6cbe9557 Mon Sep 17 00:00:00 2001 From: joel-shor Date: Fri, 20 Apr 2018 14:44:47 +0300 Subject: [PATCH 064/434] [tf.data] Fix `absl` build rule. --- tensorflow/contrib/data/python/kernel_tests/BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD index a6b46b37e7..f90b17e79e 100644 --- a/tensorflow/contrib/data/python/kernel_tests/BUILD +++ b/tensorflow/contrib/data/python/kernel_tests/BUILD @@ -308,7 +308,6 @@ py_test( srcs_version = "PY2AND3", tags = ["noasan"], deps = [ - "//third_party/py/absl/testing:parameterized", "//third_party/py/numpy", "//tensorflow/contrib/data/python/ops:resampling", "//tensorflow/python:client_testlib", @@ -319,6 +318,7 @@ py_test( "//tensorflow/python:string_ops", "//tensorflow/python:util", "//tensorflow/python/data/ops:dataset_ops", + "@absl_py//absl/testing:parameterized", ], ) -- GitLab From 8cc506f8f6c3e9071069ede1cd5c91a9f3da7c11 Mon Sep 17 00:00:00 2001 From: joel-shor Date: Fri, 20 Apr 2018 15:00:02 +0300 Subject: [PATCH 065/434] [tf.data] Reorder BUILD rule deps and add `xrange` from `six`. --- tensorflow/contrib/data/python/kernel_tests/BUILD | 2 +- tensorflow/contrib/data/python/kernel_tests/resample_test.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD index f90b17e79e..92c6967933 100644 --- a/tensorflow/contrib/data/python/kernel_tests/BUILD +++ b/tensorflow/contrib/data/python/kernel_tests/BUILD @@ -308,7 +308,6 @@ py_test( srcs_version = "PY2AND3", tags = ["noasan"], deps = [ - "//third_party/py/numpy", "//tensorflow/contrib/data/python/ops:resampling", "//tensorflow/python:client_testlib", "//tensorflow/python:dtypes", @@ -318,6 +317,7 @@ py_test( "//tensorflow/python:string_ops", "//tensorflow/python:util", "//tensorflow/python/data/ops:dataset_ops", + "//third_party/py/numpy", "@absl_py//absl/testing:parameterized", ], ) diff --git a/tensorflow/contrib/data/python/kernel_tests/resample_test.py b/tensorflow/contrib/data/python/kernel_tests/resample_test.py index 97c4b68cb6..7f007fede8 100644 --- a/tensorflow/contrib/data/python/kernel_tests/resample_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/resample_test.py @@ -18,6 +18,7 @@ from __future__ import division from __future__ import print_function import numpy as np +from six.moves import xrange # pylint: disable=redefined-builtin import time from absl.testing import parameterized -- GitLab From a10708db0d587831cafcb2e7dbdcbbcf11aede95 Mon Sep 17 00:00:00 2001 From: joel-shor Date: Fri, 20 Apr 2018 15:09:50 +0300 Subject: [PATCH 066/434] [tf.data] Second reorder BUILD rule deps. --- tensorflow/contrib/data/python/ops/BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/data/python/ops/BUILD b/tensorflow/contrib/data/python/ops/BUILD index 8cb4fa7f14..d9a5502508 100644 --- a/tensorflow/contrib/data/python/ops/BUILD +++ b/tensorflow/contrib/data/python/ops/BUILD @@ -195,7 +195,6 @@ py_library( ":batching", ":interleave_ops", ":scan_ops", - "//third_party/py/numpy", "//tensorflow/python:array_ops", "//tensorflow/python:control_flow_ops", "//tensorflow/python:dtypes", @@ -204,6 +203,7 @@ py_library( "//tensorflow/python:math_ops", "//tensorflow/python:random_ops", "//tensorflow/python/data/ops:dataset_ops", + "//third_party/py/numpy", ], ) -- GitLab From 0c03255aa5f4b37de97e0685ffa15888fc16e4b3 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 20 Apr 2018 06:36:56 -0700 Subject: [PATCH 067/434] internal change PiperOrigin-RevId: 193659701 --- .../lite/toco/graph_transformations/propagate_fixed_sizes.cc | 3 --- 1 file changed, 3 deletions(-) diff --git a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc index b34aca1f09..ba244cf5ef 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc @@ -1516,10 +1516,7 @@ void ProcessArgMaxOperator(Model* model, ArgMaxOperator* op) { return; } - // The current ArgMax implementation only supports 4-dimensional inputs with - // the last dimension as the axis to perform ArgMax for. const std::vector& input_dims = input_array.shape().dims(); - CHECK_EQ(input_dims.size(), 4); std::vector output_dims; output_dims.reserve(input_dims.size() - 1); -- GitLab From c212d5542bb666b613a8567338983288a3ab15f4 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 20 Apr 2018 08:08:01 -0700 Subject: [PATCH 068/434] Eliminate the guard around Winograd non-fused convolutions with cudnn7. PiperOrigin-RevId: 193669636 --- .../fused_conv2d_bias_activation_op.cc | 3 +- .../core/kernels/conv_grad_filter_ops.cc | 3 +- .../core/kernels/conv_grad_input_ops.cc | 3 +- tensorflow/core/kernels/conv_grad_ops_3d.cc | 8 +++-- tensorflow/core/kernels/conv_ops.cc | 3 +- tensorflow/core/kernels/conv_ops_3d.cc | 4 ++- tensorflow/core/kernels/conv_ops_gpu.h | 35 +++++++++++++------ tensorflow/core/kernels/conv_ops_test.cc | 26 +++++++++----- 8 files changed, 59 insertions(+), 26 deletions(-) diff --git a/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc b/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc index 0e06575d96..1e8f011b5d 100644 --- a/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc +++ b/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc @@ -543,7 +543,8 @@ void LaunchFusedConv2DBiasActivationOp:: fused_conv_parameters, &algorithm_config)) { std::vector algorithms; CHECK(stream->parent()->GetConvolveAlgorithms( - fused_conv_parameters.ShouldIncludeWinogradNonfusedAlgo(), + fused_conv_parameters.ShouldIncludeWinogradNonfusedAlgo( + stream->parent()), &algorithms)); dnn::ProfileResult best_result; dnn::ProfileResult best_result_no_scratch; diff --git a/tensorflow/core/kernels/conv_grad_filter_ops.cc b/tensorflow/core/kernels/conv_grad_filter_ops.cc index 66ee474ca3..f3b91494b9 100644 --- a/tensorflow/core/kernels/conv_grad_filter_ops.cc +++ b/tensorflow/core/kernels/conv_grad_filter_ops.cc @@ -912,7 +912,8 @@ void LaunchConv2DBackpropFilterOp::operator()( conv_parameters, &algorithm_config)) { std::vector algorithms; CHECK(stream->parent()->GetConvolveBackwardFilterAlgorithms( - conv_parameters.ShouldIncludeWinogradNonfusedAlgo(), &algorithms)); + conv_parameters.ShouldIncludeWinogradNonfusedAlgo(stream->parent()), + &algorithms)); ProfileResult best_result; ProfileResult best_result_no_scratch; for (auto profile_algorithm : algorithms) { diff --git a/tensorflow/core/kernels/conv_grad_input_ops.cc b/tensorflow/core/kernels/conv_grad_input_ops.cc index 71ea0d5d72..66d15c6e78 100644 --- a/tensorflow/core/kernels/conv_grad_input_ops.cc +++ b/tensorflow/core/kernels/conv_grad_input_ops.cc @@ -961,7 +961,8 @@ void LaunchConv2DBackpropInputOp::operator()( conv_parameters, &algorithm_config)) { std::vector algorithms; CHECK(stream->parent()->GetConvolveBackwardDataAlgorithms( - conv_parameters.ShouldIncludeWinogradNonfusedAlgo(), &algorithms)); + conv_parameters.ShouldIncludeWinogradNonfusedAlgo(stream->parent()), + &algorithms)); ProfileResult best_result; ProfileResult best_result_no_scratch; for (auto profile_algorithm : algorithms) { diff --git a/tensorflow/core/kernels/conv_grad_ops_3d.cc b/tensorflow/core/kernels/conv_grad_ops_3d.cc index 3650ab53b2..1234997bc5 100644 --- a/tensorflow/core/kernels/conv_grad_ops_3d.cc +++ b/tensorflow/core/kernels/conv_grad_ops_3d.cc @@ -662,7 +662,9 @@ class Conv3DBackpropInputOp : public OpKernel { conv_parameters, &algorithm_config)) { std::vector algorithms; CHECK(stream->parent()->GetConvolveBackwardDataAlgorithms( - conv_parameters.ShouldIncludeWinogradNonfusedAlgo(), &algorithms)); + conv_parameters.ShouldIncludeWinogradNonfusedAlgo( + stream->parent()), + &algorithms)); ProfileResult best_result; ProfileResult best_result_no_scratch; for (auto profile_algorithm : algorithms) { @@ -1029,7 +1031,9 @@ class Conv3DBackpropFilterOp : public OpKernel { conv_parameters, &algorithm_config)) { std::vector algorithms; CHECK(stream->parent()->GetConvolveBackwardFilterAlgorithms( - conv_parameters.ShouldIncludeWinogradNonfusedAlgo(), &algorithms)); + conv_parameters.ShouldIncludeWinogradNonfusedAlgo( + stream->parent()), + &algorithms)); ProfileResult best_result; ProfileResult best_result_no_scratch; for (auto profile_algorithm : algorithms) { diff --git a/tensorflow/core/kernels/conv_ops.cc b/tensorflow/core/kernels/conv_ops.cc index 88843e4da7..f0888c655f 100644 --- a/tensorflow/core/kernels/conv_ops.cc +++ b/tensorflow/core/kernels/conv_ops.cc @@ -710,7 +710,8 @@ void LaunchConv2DOp::operator()( !AutoTuneConv::GetInstance()->Find(conv_parameters, &algorithm_config)) { std::vector algorithms; CHECK(stream->parent()->GetConvolveAlgorithms( - conv_parameters.ShouldIncludeWinogradNonfusedAlgo(), &algorithms)); + conv_parameters.ShouldIncludeWinogradNonfusedAlgo(stream->parent()), + &algorithms)); ProfileResult best_result; ProfileResult best_result_no_scratch; for (auto profile_algorithm : algorithms) { diff --git a/tensorflow/core/kernels/conv_ops_3d.cc b/tensorflow/core/kernels/conv_ops_3d.cc index 21c84b2a0e..0b7c1524e6 100644 --- a/tensorflow/core/kernels/conv_ops_3d.cc +++ b/tensorflow/core/kernels/conv_ops_3d.cc @@ -396,7 +396,9 @@ struct LaunchConvOp { conv_parameters, &algorithm_config)) { std::vector algorithms; CHECK(stream->parent()->GetConvolveAlgorithms( - conv_parameters.ShouldIncludeWinogradNonfusedAlgo(), &algorithms)); + conv_parameters.ShouldIncludeWinogradNonfusedAlgo( + stream->parent()), + &algorithms)); ProfileResult best_result; ProfileResult best_result_no_scratch; for (auto profile_algorithm : algorithms) { diff --git a/tensorflow/core/kernels/conv_ops_gpu.h b/tensorflow/core/kernels/conv_ops_gpu.h index f0085be3a5..7f9cfec981 100644 --- a/tensorflow/core/kernels/conv_ops_gpu.h +++ b/tensorflow/core/kernels/conv_ops_gpu.h @@ -137,20 +137,18 @@ class ConvParameters { // clang-format on } - // TODO(yangzihao): The purpose of this function is to disable winograd - // nonfused conv algorithm for certain input parameters so as to avoid a bug - // in cuDNNv5 and cuDNNv6. Remove this once switch to cuDNNv7. + // The purpose of this function is to disable winograd nonfused conv algorithm + // for certain input parameters so as to avoid a bug in cuDNNv5 and cuDNNv6. template - bool ShouldIncludeWinogradNonfusedAlgo() const { - int64 total_size = 16 * std::ceil(batch_ / 16.0) * - std::max(in_depths_, out_depths_) * in_[0] * in_[1] * - sizeof(T); - int64 threshold = 1LL << 31; - if (total_size >= threshold) { - return false; - } else { + bool ShouldIncludeWinogradNonfusedAlgo( + perftools::gputools::StreamExecutor* stream_exec) const { + // Skip this check for cuDNN 7 and newer. + perftools::gputools::port::StatusOr> version = + stream_exec->AsDnn()->GetVersion(); + if (version.ok() && std::get<0>(version.ValueOrDie()) >= 7) { return true; } + return ShouldIncludeWinogradNonfusedAlgoPreCudnn7(); } protected: @@ -166,6 +164,21 @@ class ConvParameters { uint64 hash_code_; private: + friend struct ConvParametersPeer; // For testing purposes. + + template + bool ShouldIncludeWinogradNonfusedAlgoPreCudnn7() const { + int64 total_size = 16 * std::ceil(batch_ / 16.0) * + std::max(in_depths_, out_depths_) * in_[0] * in_[1] * + sizeof(T); + int64 threshold = 1LL << 31; + if (total_size >= threshold) { + return false; + } else { + return true; + } + } + int64 batch_; int64 in_depths_; int64 out_depths_; diff --git a/tensorflow/core/kernels/conv_ops_test.cc b/tensorflow/core/kernels/conv_ops_test.cc index e2e166c02f..8afe6a2cbd 100644 --- a/tensorflow/core/kernels/conv_ops_test.cc +++ b/tensorflow/core/kernels/conv_ops_test.cc @@ -22,20 +22,28 @@ limitations under the License. #include "tensorflow/core/framework/node_def_builder.h" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/types.pb.h" +#include "tensorflow/core/kernels/conv_ops_gpu.h" #include "tensorflow/core/kernels/ops_testutil.h" #include "tensorflow/core/kernels/ops_util.h" #include "tensorflow/core/platform/test.h" #include "tensorflow/core/platform/test_benchmark.h" #include "tensorflow/core/public/session.h" -#include "tensorflow/core/kernels/conv_ops_gpu.h" - namespace tensorflow { #if GOOGLE_CUDA +struct ConvParametersPeer { + template + bool ShouldIncludeWinogradNonfusedAlgoPreCudnn7() { + return params.ShouldIncludeWinogradNonfusedAlgoPreCudnn7(); + } + + ConvParameters params; +}; + TEST(ConvParameters, WinogradNonfusedAlgoSize) { - ConvParameters conv_params_small = { + ConvParametersPeer conv_params_small = {{ 1, // batch 32, // in_depths {{300, // in_rows @@ -51,10 +59,11 @@ TEST(ConvParameters, WinogradNonfusedAlgoSize) { 0}}, // padding_cols DT_FLOAT, // tensor datatype 0, // device_id - }; - EXPECT_TRUE(conv_params_small.ShouldIncludeWinogradNonfusedAlgo()); + }}; + EXPECT_TRUE( + conv_params_small.ShouldIncludeWinogradNonfusedAlgoPreCudnn7()); - ConvParameters conv_params_large = { + ConvParametersPeer conv_params_large = {{ 1, // batch 128, // in_depths {{300, // in_rows @@ -70,8 +79,9 @@ TEST(ConvParameters, WinogradNonfusedAlgoSize) { 0}}, // padding_cols DT_FLOAT, // tensor datatype 0, // device_id - }; - EXPECT_FALSE(conv_params_large.ShouldIncludeWinogradNonfusedAlgo()); + }}; + EXPECT_FALSE( + conv_params_large.ShouldIncludeWinogradNonfusedAlgoPreCudnn7()); } #endif // GOOGLE_CUDA -- GitLab From 3e20fee5810796f70713122d235176b9c022ef41 Mon Sep 17 00:00:00 2001 From: Junpeng Lao Date: Fri, 20 Apr 2018 18:05:52 +0200 Subject: [PATCH 069/434] Address comments from @srvasude --- .../kernel_tests/bijectors/ordered_test.py | 32 +++++++++++-------- .../python/ops/bijectors/ordered.py | 21 ++++++++---- 2 files changed, 32 insertions(+), 21 deletions(-) diff --git a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/ordered_test.py b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/ordered_test.py index 63c8f1fb31..721dba9c3a 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/ordered_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/ordered_test.py @@ -1,4 +1,4 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -23,33 +23,36 @@ import numpy as np from tensorflow.contrib.distributions.python.ops.bijectors.ordered import Ordered from tensorflow.python.framework import dtypes from tensorflow.python.framework import tensor_shape +from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.ops.distributions.bijector_test_util import assert_bijective_and_finite from tensorflow.python.platform import test -rng = np.random.RandomState(42) - class OrderedBijectorTest(test.TestCase): """Tests correctness of the ordered transformation.""" + def setUp(self): + self._rng = np.random.RandomState(42) + + @test_util.run_in_graph_and_eager_modes() def testBijectorVector(self): with self.test_session(): ordered = Ordered() self.assertEqual("ordered", ordered.name) x = np.asarray([[2., 3, 4], [4., 8, 13]]) y = [[2., 0, 0], [4., np.log(4.), np.log(5.)]] - self.assertAllClose(y, ordered.forward(x).eval()) - self.assertAllClose(x, ordered.inverse(y).eval()) + self.assertAllClose(y, self.evaluate(ordered.forward(x))) + self.assertAllClose(x, self.evaluate(ordered.inverse(y))) self.assertAllClose( np.sum(np.asarray(y)[..., 1:], axis=-1), - ordered.inverse_log_det_jacobian(y, event_ndims=1).eval(), + self.evaluate(ordered.inverse_log_det_jacobian(y, event_ndims=1)), atol=0., rtol=1e-7) self.assertAllClose( - -ordered.inverse_log_det_jacobian(y, event_ndims=1).eval(), - ordered.forward_log_det_jacobian(x, event_ndims=1).eval(), + self.evaluate(-ordered.inverse_log_det_jacobian(y, event_ndims=1)), + self.evaluate(ordered.forward_log_det_jacobian(x, event_ndims=1)), atol=0., rtol=1e-7) @@ -79,6 +82,7 @@ class OrderedBijectorTest(test.TestCase): atol=0., rtol=1e-7) + @test_util.run_in_graph_and_eager_modes() def testShapeGetters(self): with self.test_session(): x = tensor_shape.TensorShape([4]) @@ -86,18 +90,18 @@ class OrderedBijectorTest(test.TestCase): bijector = Ordered(validate_args=True) self.assertAllEqual(y, bijector.forward_event_shape(x)) self.assertAllEqual(y.as_list(), - bijector.forward_event_shape_tensor( - x.as_list()).eval()) + self.evaluate(bijector.forward_event_shape_tensor( + x.as_list()))) self.assertAllEqual(x, bijector.inverse_event_shape(y)) self.assertAllEqual(x.as_list(), - bijector.inverse_event_shape_tensor( - y.as_list()).eval()) + self.evaluate(bijector.inverse_event_shape_tensor( + y.as_list()))) def testBijectiveAndFinite(self): with self.test_session(): ordered = Ordered() - x = np.sort(rng.randn(3, 10), axis=-1).astype(np.float32) - y = (rng.randn(3, 10)).astype(np.float32) + x = np.sort(self._rng.randn(3, 10), axis=-1).astype(np.float32) + y = (self._rng.randn(3, 10)).astype(np.float32) assert_bijective_and_finite(ordered, x, y, event_ndims=1) diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/ordered.py b/tensorflow/contrib/distributions/python/ops/bijectors/ordered.py index b2959cce31..46fec0562c 100644 --- a/tensorflow/contrib/distributions/python/ops/bijectors/ordered.py +++ b/tensorflow/contrib/distributions/python/ops/bijectors/ordered.py @@ -36,6 +36,8 @@ class Ordered(bijector.Bijector): """Bijector which maps a tensor x_k that has increasing elements in the last dimension to an unconstrained tensor y_k. + Both the domain and the codomain of the mapping is [-inf, inf], however, + the input of the forward mapping must be strictly increasing. The inverse of the bijector applied to a normal random vector `y ~ N(0, 1)` gives back a sorted random vector with the same distribution `x ~ N(0, 1)` where `x = sort(y)` @@ -55,11 +57,7 @@ class Ordered(bijector.Bijector): ``` """ - def __init__(self, - validate_args=False, - name="ordered"): - self._graph_parents = [] - self._name = name + def __init__(self, validate_args=False, name="ordered"): super(Ordered, self).__init__( forward_min_event_ndims=1, validate_args=validate_args, @@ -90,21 +88,30 @@ class Ordered(bijector.Bijector): def _forward(self, x): x = self._maybe_assert_valid_x(x) - y0 = array_ops.expand_dims(x[..., 0], -1) + y0 = x[..., 0, array_ops.newaxis] yk = math_ops.log(x[..., 1:] - x[..., :-1]) y = array_ops.concat([y0, yk], axis=-1) return y def _inverse(self, y): - x0 = array_ops.expand_dims(y[..., 0], -1) + x0 = y[..., 0, array_ops.newaxis] xk = math_ops.exp(y[..., 1:]) x = array_ops.concat([x0, xk], axis=-1) return math_ops.cumsum(x, axis=-1) def _inverse_log_det_jacobian(self, y): + # The Jacobian of the inverse mapping is lower + # triangular, with the diagonal elements being: + # J[i,i] = 1 if i=1, and + # exp(y_i) if 1 Date: Fri, 20 Apr 2018 09:20:36 -0700 Subject: [PATCH 070/434] [TF:XLA] Bump open source llvm revision to r330313 PiperOrigin-RevId: 193678317 --- tensorflow/workspace.bzl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index d7bd2a2be0..aeaf8d7a24 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -451,11 +451,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""): tf_http_archive( name = "llvm", urls = [ - "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/3210e64b499a31193051208f2f8922dadfc4bb6f.tar.gz", - "https://github.com/llvm-mirror/llvm/archive/3210e64b499a31193051208f2f8922dadfc4bb6f.tar.gz", + "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/c1e9b6f826c86c87a7e7173f1baf7e7df9f43e32.tar.gz", + "https://github.com/llvm-mirror/llvm/archive/c1e9b6f826c86c87a7e7173f1baf7e7df9f43e32.tar.gz", ], - sha256 = "017d7db029cc175634d75416c326770139c76590575ed44a3794c11ab160c955", - strip_prefix = "llvm-3210e64b499a31193051208f2f8922dadfc4bb6f", + sha256 = "92b7c01074f694a77b4d664951d1ec071e30ef19c61e673158e95fbb6e447b54", + strip_prefix = "llvm-c1e9b6f826c86c87a7e7173f1baf7e7df9f43e32", build_file = clean_dep("//third_party/llvm:llvm.BUILD"), ) -- GitLab From d0e3e998376f5e7d59678e5d42f3497e52ca7622 Mon Sep 17 00:00:00 2001 From: Saurabh Saxena Date: Fri, 20 Apr 2018 09:23:52 -0700 Subject: [PATCH 071/434] Fix msan error in MapAndBatchDataset. While checkpointing tensors in BatchResult.output save only the initialized slice. If the final batch is short, the entire batch tensor may not be initialized. PiperOrigin-RevId: 193678679 --- .../kernels/data/map_and_batch_dataset_op.cc | 44 +++++++++++++++---- 1 file changed, 35 insertions(+), 9 deletions(-) diff --git a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc index b8105552a0..605ef3c0b7 100644 --- a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc +++ b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc @@ -331,7 +331,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel { } CHECK_EQ(batch_results_.size(), batch_results_size); for (size_t i = 0; i < batch_results_size; ++i) { - TF_RETURN_IF_ERROR(ReadBatchResultLocked(reader, i)); + TF_RETURN_IF_ERROR(ReadBatchResultLocked(ctx, reader, i)); } return Status::OK(); } @@ -573,7 +573,9 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel { // finish. This may delay saving a checkpoint by a bit but keeps the // code clean and also saves us from checkpointing the state of the // `BlockingCounter`. - batch_results_[index].counter->Wait(); + int64 num_elements = 0; + WaitForBatch(index, &num_elements).IgnoreError(); + const BatchResult& result = batch_results_[index]; string prefix = strings::StrCat("batch_results_", index); { @@ -587,14 +589,24 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel { full_name(strings::StrCat(prefix, "_output_size")), result.output.size())); for (size_t i = 0; i < result.output.size(); i++) { - TF_RETURN_IF_ERROR(writer->WriteTensor( - full_name(strings::StrCat(prefix, "_output_", i)), - result.output[i])); + // If the batch is not full, we only store the first + // `num_elements` values. The rest of the batch tensor is + // *uninitialized* and accessing that will raise msan errors. + if (num_elements < dataset()->batch_size_) { + TF_RETURN_IF_ERROR(writer->WriteTensor( + full_name(strings::StrCat(prefix, "_output_", i)), + result.output[i].Slice(0, num_elements))); + } else { + TF_RETURN_IF_ERROR(writer->WriteTensor( + full_name(strings::StrCat(prefix, "_output_", i)), + result.output[i])); + } } return Status::OK(); } - Status ReadBatchResultLocked(IteratorStateReader* reader, size_t index) + Status ReadBatchResultLocked(IteratorContext* ctx, + IteratorStateReader* reader, size_t index) EXCLUSIVE_LOCKS_REQUIRED(mu_) { BatchResult* result = &batch_results_[index]; string prefix = strings::StrCat("batch_results_", index); @@ -618,10 +630,24 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel { } result->output.reserve(output_size); for (size_t i = 0; i < output_size; i++) { - result->output.emplace_back(); + Tensor t; TF_RETURN_IF_ERROR(reader->ReadTensor( - full_name(strings::StrCat(prefix, "_output_", i)), - &result->output.back())); + full_name(strings::StrCat(prefix, "_output_", i)), &t)); + // If the batch was not full, we may have stored only the relevant + // slice. Since tensors in `BatchResult.output` are expected to + // have the leading dimension of size batch_size, we build a larger + // tensor and copy the slice read from the checkpoint into it. + if (t.dim_size(0) < dataset()->batch_size_) { + TensorShape component_shape(t.shape()); + component_shape.set_dim(0, dataset()->batch_size_); + AllocatorAttributes attr; + attr.set_gpu_compatible(true); + Tensor new_t(ctx->allocator(attr), t.dtype(), component_shape); + TF_RETURN_IF_ERROR(CopyPartialBatch(&new_t, t, t.dim_size(0))); + result->output.emplace_back(std::move(new_t)); + } else { + result->output.emplace_back(std::move(t)); + } } return Status::OK(); } -- GitLab From cd462f39e58674a43d1f8c156f23235722b2281e Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Fri, 20 Apr 2018 09:31:08 -0700 Subject: [PATCH 072/434] Don't delete inbound_nodes and outbound_nodes, these no longer exist. PiperOrigin-RevId: 193679512 --- tensorflow/tools/docs/generate.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tensorflow/tools/docs/generate.py b/tensorflow/tools/docs/generate.py index c750539a76..fc93085e3e 100644 --- a/tensorflow/tools/docs/generate.py +++ b/tensorflow/tools/docs/generate.py @@ -43,10 +43,6 @@ if __name__ == '__main__': flags = doc_generator.parse_known_args() - # Suppress documentation of some symbols that users should never use. - del tf.layers.Layer.inbound_nodes - del tf.layers.Layer.outbound_nodes - # tf_debug is not imported with tf, it's a separate module altogether doc_generator.set_py_modules([('tf', tf), ('tfdbg', tf_debug)]) -- GitLab From fb23c0e166179ccf372203982d8fe79de441e360 Mon Sep 17 00:00:00 2001 From: James Keeling Date: Fri, 20 Apr 2018 09:54:50 -0700 Subject: [PATCH 073/434] Correct error in "Adding An Op" docs. The macro `REGISTER_KERNEL_BUILDER` always declared a functor specialized on floats, instead of the type actually passed into the macro. PiperOrigin-RevId: 193682519 --- tensorflow/docs_src/extend/adding_an_op.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/docs_src/extend/adding_an_op.md b/tensorflow/docs_src/extend/adding_an_op.md index 84da2165b5..c3795492ce 100644 --- a/tensorflow/docs_src/extend/adding_an_op.md +++ b/tensorflow/docs_src/extend/adding_an_op.md @@ -267,7 +267,7 @@ REGISTER_CPU(int32); #ifdef GOOGLE_CUDA #define REGISTER_GPU(T) \ /* Declare explicit instantiations in kernel_example.cu.cc. */ \ - extern template ExampleFunctor; \ + extern template ExampleFunctor; \ REGISTER_KERNEL_BUILDER( \ Name("Example").Device(DEVICE_GPU).TypeConstraint("T"), \ ExampleOp); -- GitLab From a749a6b95932d6f7438a01a2f5fd661343ad536f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 20 Apr 2018 10:16:03 -0700 Subject: [PATCH 074/434] Change the TF record reader to use 16MB buffering by default in order to improve performance. PiperOrigin-RevId: 193685521 --- tensorflow/python/lib/io/py_record_reader.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/lib/io/py_record_reader.cc b/tensorflow/python/lib/io/py_record_reader.cc index 5fcb51b3b2..9500fc6a7c 100644 --- a/tensorflow/python/lib/io/py_record_reader.cc +++ b/tensorflow/python/lib/io/py_record_reader.cc @@ -43,9 +43,10 @@ PyRecordReader* PyRecordReader::New(const string& filename, uint64 start_offset, reader->offset_ = start_offset; reader->file_ = file.release(); + static const uint64 kReaderBufferSize = 16 * 1024 * 1024; RecordReaderOptions options = RecordReaderOptions::CreateRecordReaderOptions(compression_type_string); - + options.buffer_size = kReaderBufferSize; reader->reader_ = new RecordReader(reader->file_, options); return reader; } -- GitLab From 729192823935156ae29d7f0d5f64c0bcd6034c7a Mon Sep 17 00:00:00 2001 From: Jacques Pienaar Date: Fri, 20 Apr 2018 10:32:24 -0700 Subject: [PATCH 075/434] Adding Shape inference functions to outfeed enqueue ops. PiperOrigin-RevId: 193688099 --- tensorflow/contrib/tpu/ops/outfeed_ops.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/contrib/tpu/ops/outfeed_ops.cc b/tensorflow/contrib/tpu/ops/outfeed_ops.cc index 5900c61a38..b05c76ca64 100644 --- a/tensorflow/contrib/tpu/ops/outfeed_ops.cc +++ b/tensorflow/contrib/tpu/ops/outfeed_ops.cc @@ -26,6 +26,7 @@ REGISTER_OP("OutfeedEnqueue") .Input("input: dtype") .Attr("dtype: type") .SetIsStateful() + .SetShapeFn(shape_inference::NoOutputs) .Doc(R"doc( An op which emits a single Tensor value from an XLA computation. @@ -36,6 +37,7 @@ REGISTER_OP("OutfeedEnqueueTuple") .Input("inputs: dtypes") .Attr("dtypes: list(type)") .SetIsStateful() + .SetShapeFn(shape_inference::NoOutputs) .Doc(R"doc( An op which emits multiple Tensor values from an XLA computation. -- GitLab From da5a6d86b856001c03cccace5ac74fa8f045b6ae Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 20 Apr 2018 10:34:49 -0700 Subject: [PATCH 076/434] Disable constant folding and arithmetic optimizations for functions. PiperOrigin-RevId: 193688466 --- tensorflow/core/grappler/optimizers/meta_optimizer.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc index 22799311bc..cdc4698c34 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc @@ -243,6 +243,10 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, std::unordered_set optimized_funcs; bool optimize_function_library = true; + // TODO(ezhulenev): turn it on after fixing ranklab: tune_tf_test. + cfg_.set_constant_folding(RewriterConfig::OFF); + cfg_.set_arithmetic_optimization(RewriterConfig::OFF); + while (optimize_function_library) { optimize_function_library = false; -- GitLab From a09c02a3ecc190da8fbae88bdc54505de5387645 Mon Sep 17 00:00:00 2001 From: Junpeng Lao Date: Fri, 20 Apr 2018 20:06:02 +0200 Subject: [PATCH 077/434] minor code styling --- .../contrib/distributions/python/ops/bijectors/ordered.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/ordered.py b/tensorflow/contrib/distributions/python/ops/bijectors/ordered.py index 46fec0562c..a180f1df0c 100644 --- a/tensorflow/contrib/distributions/python/ops/bijectors/ordered.py +++ b/tensorflow/contrib/distributions/python/ops/bijectors/ordered.py @@ -46,7 +46,7 @@ class Ordered(bijector.Bijector): `y[0] = x[0]` `y[1:] = math_ops.log(x[1:] - x[:-1])` - Example Use: + #### Example Use: ```python bijector.Ordered().forward([2, 3, 4]) -- GitLab From b3f379e907259aa166c1ef734ccfd03331eb0a94 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Fri, 20 Apr 2018 11:10:56 -0700 Subject: [PATCH 078/434] [XLA:CPU] Use Eigen for F64 dot operations PiperOrigin-RevId: 193694613 --- tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc | 3 ++- tensorflow/compiler/xla/service/cpu/ir_emitter.cc | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc index 29afd8ea5f..495fecc4aa 100644 --- a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc @@ -1070,7 +1070,8 @@ static bool AreValidGemmShapes(const Shape& lhs_shape, const Shape& rhs_shape, // 1) be matrices with no padding, and // 2) have an allowed element type. PrimitiveType output_primitive_type = output_shape.element_type(); - return (output_primitive_type == F32 || output_primitive_type == F16) && + return (output_primitive_type == F64 || output_primitive_type == F32 || + output_primitive_type == F16) && IsRank2WithNoPadding(lhs_shape) && IsRank2WithNoPadding(rhs_shape) && IsRank2WithNoPadding(output_shape); } diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc index 3405277d44..f990ee2785 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc @@ -2076,7 +2076,7 @@ Status IrEmitter::HandleFusion(HloInstruction* fusion) { TF_RETURN_IF_ERROR(ElementTypesSameAndSupported( /*instruction=*/*root, /*operands=*/{lhs, rhs}, - /*supported_types=*/{F16, F32})); + /*supported_types=*/{F16, F32, F64})); llvm_ir::IrArray lhs_array(GetIrArrayFor(lhs)); llvm_ir::IrArray rhs_array(GetIrArrayFor(rhs)); -- GitLab From 49f3469d9533cb12d06ed3907b4ced975e2fcea4 Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Fri, 20 Apr 2018 11:13:16 -0700 Subject: [PATCH 079/434] Use CreateWorkerSession and DeleteWorkerSession for all distributed sessions. This change adds a phase to the session creation protocol: the master now contacts all workers to register a session handle and create a "WorkerSession" on each worker before it first registers or runs a graph on any worker. Subsequent requests to a worker ensure that the worker has the session handle registered before performing the request, and an AbortedError is raised if the worker has not (e.g. because it restarted after a failure). As a result, more failure cases are covered by the high-level APIs (tf.estimator, Slim, etc.) that recreate the session on receiving an AbortedError. Previously, there was a possible race condition in which a PS task could restart between variable initialization and the first step, leading to a FailedPreconditionError ("Attempting to use uninitialized value") that would not be handled by the high-level APIs. PiperOrigin-RevId: 193694958 --- .../core/distributed_runtime/master_session.cc | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/tensorflow/core/distributed_runtime/master_session.cc b/tensorflow/core/distributed_runtime/master_session.cc index ebe350d313..1c67b42e76 100644 --- a/tensorflow/core/distributed_runtime/master_session.cc +++ b/tensorflow/core/distributed_runtime/master_session.cc @@ -89,6 +89,10 @@ class MasterSession::ReffedClientGraph : public core::RefCounted { ~ReffedClientGraph() override { if (should_deregister_) { DeregisterPartitions(); + } else { + for (Part& part : partitions_) { + worker_cache_->ReleaseWorker(part.name, part.worker); + } } } @@ -1174,14 +1178,8 @@ Status MasterSession::Create(GraphDef* graph_def, TF_RETURN_IF_ERROR(GraphExecutionState::MakeForBaseGraph( graph_def, execution_options, &execution_state_)); } - // TODO(b/36574172): Remove these conditions when ClusterSpec - // propagation is supported in all servers. - if (options.cluster_def != nullptr || - session_opts_.config.isolate_session_state()) { - should_delete_worker_sessions_ = true; - return CreateWorkerSessions(options); - } - return Status::OK(); + should_delete_worker_sessions_ = true; + return CreateWorkerSessions(options); } Status MasterSession::CreateWorkerSessions( -- GitLab From 570d90b9c7e6a19bc2606fdaf7ad0f85b8590c0e Mon Sep 17 00:00:00 2001 From: akindyakov Date: Fri, 20 Apr 2018 11:23:15 -0700 Subject: [PATCH 080/434] Speed up safe_strtod and safe_strtof functions by using double-conversion library Closes #12102. PiperOrigin-RevId: 193696537 --- tensorflow/contrib/cmake/CMakeLists.txt | 4 + .../cmake/external/double_conversion.cmake | 54 ++++++++++++ tensorflow/contrib/makefile/Makefile | 8 +- .../contrib/makefile/download_dependencies.sh | 4 +- tensorflow/core/BUILD | 9 +- tensorflow/core/lib/strings/numbers.cc | 51 +++++++---- tensorflow/core/lib/strings/numbers.h | 2 + tensorflow/core/lib/strings/numbers_test.cc | 87 +++++++++++++++++++ tensorflow/core/lib/strings/str_util.cc | 8 ++ tensorflow/core/lib/strings/str_util.h | 5 ++ tensorflow/core/lib/strings/str_util_test.cc | 56 ++---------- tensorflow/tools/lib_package/BUILD | 2 + tensorflow/tools/pip_package/BUILD | 1 + tensorflow/workspace.bzl | 10 +++ third_party/double_conversion.BUILD | 38 ++++++++ 15 files changed, 270 insertions(+), 69 deletions(-) create mode 100644 tensorflow/contrib/cmake/external/double_conversion.cmake create mode 100644 third_party/double_conversion.BUILD diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt index 23b31ae1dc..bdf3e98635 100644 --- a/tensorflow/contrib/cmake/CMakeLists.txt +++ b/tensorflow/contrib/cmake/CMakeLists.txt @@ -193,6 +193,7 @@ include(protobuf) include(re2) include(cub) include(sqlite) +include(double_conversion) if (tensorflow_BUILD_CC_TESTS) include(googletest) endif() @@ -213,6 +214,7 @@ set(tensorflow_EXTERNAL_LIBRARIES ${protobuf_STATIC_LIBRARIES} ${re2_STATIC_LIBRARIES} ${sqlite_STATIC_LIBRARIES} + ${double_conversion_STATIC_LIBRARIES} ) if (systemlib_ZLIB) @@ -240,6 +242,7 @@ set(tensorflow_EXTERNAL_DEPENDENCIES fft2d re2 sqlite_copy_headers_to_destination + double_conversion ) include_directories( @@ -262,6 +265,7 @@ include_directories( ${PROTOBUF_INCLUDE_DIRS} ${re2_INCLUDE_DIR} ${sqlite_INCLUDE_DIR} + ${double_conversion_INCLUDE_DIR} ) if(tensorflow_ENABLE_SSL_SUPPORT) diff --git a/tensorflow/contrib/cmake/external/double_conversion.cmake b/tensorflow/contrib/cmake/external/double_conversion.cmake new file mode 100644 index 0000000000..527ccdc8d8 --- /dev/null +++ b/tensorflow/contrib/cmake/external/double_conversion.cmake @@ -0,0 +1,54 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +include (ExternalProject) + +set(double_conversion_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/double_conversion/src/double_conversion) +set(double_conversion_URL https://github.com/google/double-conversion.git) +set(double_conversion_TAG 5664746) +set(double_conversion_BUILD ${double_conversion_INCLUDE_DIR}) +set(double_conversion_LIBRARIES ${double_conversion_BUILD}/double-conversion/libdouble-conversion.so) +set(double_conversion_INCLUDES ${double_conversion_BUILD}) + +if(WIN32) + set(double_conversion_STATIC_LIBRARIES ${double_conversion_BUILD}/double-conversion/$(Configuration)/double-conversion.lib) +else() + set(double_conversion_STATIC_LIBRARIES ${double_conversion_BUILD}/double-conversion/libdouble-conversion.a) +endif() + +set(double_conversion_HEADERS + "${double_conversion_INCLUDE_DIR}/double-conversion/bignum-dtoa.h" + "${double_conversion_INCLUDE_DIR}/double-conversion/cached-powers.h" + "${double_conversion_INCLUDE_DIR}/double-conversion/double-conversion.h" + "${double_conversion_INCLUDE_DIR}/double-conversion/fixed-dtoa.h" + "${double_conversion_INCLUDE_DIR}/double-conversion/strtod.h" + "${double_conversion_INCLUDE_DIR}/double-conversion/bignum.h" + "${double_conversion_INCLUDE_DIR}/double-conversion/diy-fp.h" + "${double_conversion_INCLUDE_DIR}/double-conversion/fast-dtoa.h" + "${double_conversion_INCLUDE_DIR}/double-conversion/ieee.h" + "${double_conversion_INCLUDE_DIR}/double-conversion/utils.h" +) + +ExternalProject_Add(double_conversion + PREFIX double_conversion + GIT_REPOSITORY ${double_conversion_URL} + GIT_TAG ${double_conversion_TAG} + DOWNLOAD_DIR "${DOWNLOAD_LOCATION}" + BUILD_IN_SOURCE 1 + INSTALL_COMMAND "" + CMAKE_CACHE_ARGS + -DCMAKE_BUILD_TYPE:STRING=Release + -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON +) diff --git a/tensorflow/contrib/makefile/Makefile b/tensorflow/contrib/makefile/Makefile index 05e8d9064b..1a1ab54a53 100644 --- a/tensorflow/contrib/makefile/Makefile +++ b/tensorflow/contrib/makefile/Makefile @@ -89,6 +89,7 @@ HOST_INCLUDES := \ -I$(MAKEFILE_DIR)/downloads/gemmlowp \ -I$(MAKEFILE_DIR)/downloads/nsync/public \ -I$(MAKEFILE_DIR)/downloads/fft2d \ +-I$(MAKEFILE_DIR)/downloads/double_conversion \ -I$(HOST_GENDIR) ifeq ($(HAS_GEN_HOST_PROTOC),true) HOST_INCLUDES += -I$(MAKEFILE_DIR)/gen/protobuf-host/include @@ -125,7 +126,9 @@ PROTO_TEXT := $(HOST_BINDIR)proto_text # The list of dependencies is derived from the Bazel build file by running # the gen_file_lists.sh script on a system with a working Bazel setup. PROTO_TEXT_CC_FILES := $(shell cat $(MAKEFILE_DIR)/proto_text_cc_files.txt) -PROTO_TEXT_PB_CC_LIST := $(shell cat $(MAKEFILE_DIR)/proto_text_pb_cc_files.txt) +PROTO_TEXT_PB_CC_LIST := \ + $(shell cat $(MAKEFILE_DIR)/proto_text_pb_cc_files.txt) \ + $(wildcard tensorflow/contrib/makefile/downloads/double_conversion/double-conversion/*.cc) PROTO_TEXT_PB_H_LIST := $(shell cat $(MAKEFILE_DIR)/proto_text_pb_h_files.txt) # Locations of the intermediate files proto_text generates. @@ -171,6 +174,7 @@ INCLUDES := \ -I$(MAKEFILE_DIR)/downloads/gemmlowp \ -I$(MAKEFILE_DIR)/downloads/nsync/public \ -I$(MAKEFILE_DIR)/downloads/fft2d \ +-I$(MAKEFILE_DIR)/downloads/double_conversion \ -I$(PROTOGENDIR) \ -I$(PBTGENDIR) ifeq ($(HAS_GEN_HOST_PROTOC),true) @@ -326,6 +330,7 @@ $(MARCH_OPTION) \ -I$(MAKEFILE_DIR)/downloads/gemmlowp \ -I$(MAKEFILE_DIR)/downloads/nsync/public \ -I$(MAKEFILE_DIR)/downloads/fft2d \ +-I$(MAKEFILE_DIR)/downloads/double_conversion \ -I$(MAKEFILE_DIR)/gen/protobuf_android/$(ANDROID_ARCH)/include \ -I$(PROTOGENDIR) \ -I$(PBTGENDIR) @@ -603,6 +608,7 @@ $(wildcard tensorflow/core/platform/*/*.cc) \ $(wildcard tensorflow/core/platform/*/*/*.cc) \ $(wildcard tensorflow/core/util/*.cc) \ $(wildcard tensorflow/core/util/*/*.cc) \ +$(wildcard tensorflow/contrib/makefile/downloads/double_conversion/double-conversion/*.cc) \ tensorflow/core/util/version_info.cc # Remove duplicates (for version_info.cc) CORE_CC_ALL_SRCS := $(sort $(CORE_CC_ALL_SRCS)) diff --git a/tensorflow/contrib/makefile/download_dependencies.sh b/tensorflow/contrib/makefile/download_dependencies.sh index 8b415e6527..48953e2e38 100755 --- a/tensorflow/contrib/makefile/download_dependencies.sh +++ b/tensorflow/contrib/makefile/download_dependencies.sh @@ -32,7 +32,8 @@ GOOGLETEST_URL="https://github.com/google/googletest/archive/release-1.8.0.tar.g NSYNC_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/nsync/.*tar\.gz' "${BZL_FILE_PATH}" | head -n1)" PROTOBUF_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/protobuf/.*tar\.gz' "${BZL_FILE_PATH}" | head -n1)" RE2_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/re2/.*tar\.gz' "${BZL_FILE_PATH}" | head -n1)" -FFT2D_URL="$(grep -o 'http.*fft\.tgz' "${BZL_FILE_PATH}" | grep -v mirror.bazel | head -n1)" +FFT2D_URL="$(grep -o 'http.*fft\.tgz' "${BZL_FILE_PATH}" | grep -v bazel-mirror | head -n1)" +DOUBLE_CONVERSION_URL="$(grep -o "https.*google/double-conversion.*\.zip" "${BZL_FILE_PATH}" | head -n1)" ABSL_URL="$(grep -o 'https://github.com/abseil/abseil-cpp/.*tar.gz' "${BZL_FILE_PATH}" | head -n1)" CUB_URL="$(grep -o 'https.*cub/archive.*zip' "${BZL_FILE_PATH}" | grep -v mirror.bazel | head -n1)" @@ -87,6 +88,7 @@ download_and_extract "${NSYNC_URL}" "${DOWNLOADS_DIR}/nsync" download_and_extract "${PROTOBUF_URL}" "${DOWNLOADS_DIR}/protobuf" download_and_extract "${RE2_URL}" "${DOWNLOADS_DIR}/re2" download_and_extract "${FFT2D_URL}" "${DOWNLOADS_DIR}/fft2d" +download_and_extract "${DOUBLE_CONVERSION_URL}" "${DOWNLOADS_DIR}/double_conversion" download_and_extract "${ABSL_URL}" "${DOWNLOADS_DIR}/absl" download_and_extract "${CUB_URL}" "${DOWNLOADS_DIR}/cub/external/cub_archive" diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index c15e7de186..5b04574a4f 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -337,7 +337,9 @@ cc_library( "lib/bfloat16/bfloat16.h", ] + tf_additional_proto_hdrs() + glob(tf_env_time_hdrs()), copts = tf_copts(), - deps = tf_lib_proto_parsing_deps(), + deps = tf_lib_proto_parsing_deps() + [ + "@double_conversion//:double-conversion", + ], ) # This build rule (along with :lib_internal, :framework, and @@ -1231,6 +1233,7 @@ cc_library( deps = [ ":protos_all_cc_impl", "//third_party/eigen3", + "@double_conversion//:double-conversion", "@nsync//:nsync_cpp", "@protobuf_archive//:protobuf", ], @@ -1270,6 +1273,7 @@ cc_library( deps = [ ":protos_all_cc_impl", "//third_party/eigen3", + "@double_conversion//:double-conversion", "@nsync//:nsync_cpp", "@protobuf_archive//:protobuf", ], @@ -1333,6 +1337,7 @@ cc_library( deps = [ ":protos_all_cc_impl", "//third_party/eigen3", + "@double_conversion//:double-conversion", "@nsync//:nsync_cpp", "@protobuf_archive//:protobuf", ], @@ -1355,6 +1360,7 @@ cc_library( deps = [ ":protos_all_cc_impl", "//third_party/eigen3", + "@double_conversion//:double-conversion", "@nsync//:nsync_cpp", "@protobuf_archive//:protobuf", ], @@ -1751,6 +1757,7 @@ cc_library( "//tensorflow/core/platform/default/build_config:platformlib", "@snappy", "@zlib_archive//:zlib", + "@double_conversion//:double-conversion", "@protobuf_archive//:protobuf", ] + tf_protos_all_impl() + tf_protos_grappler_impl(), ) diff --git a/tensorflow/core/lib/strings/numbers.cc b/tensorflow/core/lib/strings/numbers.cc index c296daa95d..e4b909296e 100644 --- a/tensorflow/core/lib/strings/numbers.cc +++ b/tensorflow/core/lib/strings/numbers.cc @@ -23,6 +23,8 @@ limitations under the License. #include #include +#include "double-conversion/double-conversion.h" + #include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/lib/strings/stringprintf.h" #include "tensorflow/core/platform/logging.h" @@ -110,6 +112,17 @@ T locale_independent_strtonum(const char* str, const char** endptr) { return result; } +static inline const double_conversion::StringToDoubleConverter& +StringToFloatConverter() { + static const double_conversion::StringToDoubleConverter converter( + double_conversion::StringToDoubleConverter::ALLOW_LEADING_SPACES | + double_conversion::StringToDoubleConverter::ALLOW_HEX | + double_conversion::StringToDoubleConverter::ALLOW_TRAILING_SPACES | + double_conversion::StringToDoubleConverter::ALLOW_CASE_INSENSIBILITY, + 0., 0., "inf", "nan"); + return converter; +} + } // namespace namespace strings { @@ -319,25 +332,31 @@ bool safe_strtou32(StringPiece str, uint32* value) { } bool safe_strtof(const char* str, float* value) { - const char* endptr; - *value = locale_independent_strtonum(str, &endptr); - while (isspace(*endptr)) ++endptr; - // Ignore range errors from strtod/strtof. - // The values it returns on underflow and - // overflow are the right fallback in a - // robust setting. - return *str != '\0' && *endptr == '\0'; + int processed_characters_count = -1; + auto len = str_util::Strnlen(str, kFastToBufferSize); + + // If there is no zero-termination in str, fail. + if (len == kFastToBufferSize) return false; + // If string length exceeds int max, fail. + if (len > std::numeric_limits::max()) return false; + + *value = StringToFloatConverter().StringToFloat(str, static_cast(len), + &processed_characters_count); + return processed_characters_count > 0; } bool safe_strtod(const char* str, double* value) { - const char* endptr; - *value = locale_independent_strtonum(str, &endptr); - while (isspace(*endptr)) ++endptr; - // Ignore range errors from strtod/strtof. - // The values it returns on underflow and - // overflow are the right fallback in a - // robust setting. - return *str != '\0' && *endptr == '\0'; + int processed_characters_count = -1; + auto len = str_util::Strnlen(str, kFastToBufferSize); + + // If there is no zero-termination in str, fail. + if (len == kFastToBufferSize) return false; + // If string length exceeds int max, fail. + if (len > std::numeric_limits::max()) return false; + + *value = StringToFloatConverter().StringToDouble(str, static_cast(len), + &processed_characters_count); + return processed_characters_count > 0; } size_t FloatToBuffer(float value, char* buffer) { diff --git a/tensorflow/core/lib/strings/numbers.h b/tensorflow/core/lib/strings/numbers.h index 6b7703be37..e9add42849 100644 --- a/tensorflow/core/lib/strings/numbers.h +++ b/tensorflow/core/lib/strings/numbers.h @@ -114,11 +114,13 @@ bool safe_strtou64(StringPiece str, uint64* value); // Convert strings to floating point values. // Leading and trailing spaces are allowed. // Values may be rounded on over- and underflow. +// Returns false on invalid input or if `strlen(value) >= kFastToBufferSize`. bool safe_strtof(const char* str, float* value); // Convert strings to double precision floating point values. // Leading and trailing spaces are allowed. // Values may be rounded on over- and underflow. +// Returns false on invalid input or if `strlen(value) >= kFastToBufferSize`. bool safe_strtod(const char* str, double* value); inline bool ProtoParseNumeric(StringPiece s, int32* value) { diff --git a/tensorflow/core/lib/strings/numbers_test.cc b/tensorflow/core/lib/strings/numbers_test.cc index e15161de66..0f22dac262 100644 --- a/tensorflow/core/lib/strings/numbers_test.cc +++ b/tensorflow/core/lib/strings/numbers_test.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/core/lib/strings/numbers.h" +#include #include #include "tensorflow/core/platform/test.h" @@ -277,7 +278,49 @@ TEST(safe_strtof, Float) { EXPECT_TRUE(safe_strtof("-0x2A", &result)); EXPECT_EQ(-42.0f, result); + EXPECT_TRUE(safe_strtof(" -0x2", &result)); + EXPECT_EQ(-2.0f, result); + + EXPECT_TRUE(safe_strtof("8 \t", &result)); + EXPECT_EQ(8.0f, result); + + EXPECT_TRUE(safe_strtof("\t20.0\t ", &result)); + EXPECT_EQ(20.0f, result); + EXPECT_FALSE(safe_strtof("-infinity is awesome", &result)); + + // Make sure we exit cleanly if the string is not terminated + char test_str[2 * kFastToBufferSize]; + for (int i = 0; i < 2 * kFastToBufferSize; ++i) test_str[i] = 'a'; + EXPECT_FALSE(safe_strtof(test_str, &result)); + + // Make sure we exit cleanly if the string is too long + test_str[kFastToBufferSize + 1] = '\0'; + EXPECT_FALSE(safe_strtof(test_str, &result)); + + EXPECT_TRUE(safe_strtof("-inf", &result)); + EXPECT_EQ(-std::numeric_limits::infinity(), result); + + EXPECT_TRUE(safe_strtof("+inf", &result)); + EXPECT_EQ(std::numeric_limits::infinity(), result); + + EXPECT_TRUE(safe_strtof("InF", &result)); + EXPECT_EQ(std::numeric_limits::infinity(), result); + + EXPECT_TRUE(safe_strtof("-INF", &result)); + EXPECT_EQ(-std::numeric_limits::infinity(), result); + + EXPECT_TRUE(safe_strtof("nan", &result)); + EXPECT_TRUE(std::isnan(result)); + + EXPECT_TRUE(safe_strtof("-nan", &result)); + EXPECT_TRUE(std::isnan(result)); + + EXPECT_TRUE(safe_strtof("-NaN", &result)); + EXPECT_TRUE(std::isnan(result)); + + EXPECT_TRUE(safe_strtof("+NAN", &result)); + EXPECT_TRUE(std::isnan(result)); } TEST(safe_strtod, Double) { @@ -287,6 +330,15 @@ TEST(safe_strtod, Double) { EXPECT_EQ(0.1234567890123, result); EXPECT_FALSE(safe_strtod("0.1234567890123abc", &result)); + // Make sure we exit cleanly if the string is not terminated + char test_str[2 * kFastToBufferSize]; + for (int i = 0; i < 2 * kFastToBufferSize; ++i) test_str[i] = 'a'; + EXPECT_FALSE(safe_strtod(test_str, &result)); + + // Make sure we exit cleanly if the string is too long + test_str[kFastToBufferSize + 1] = '\0'; + EXPECT_FALSE(safe_strtod(test_str, &result)); + // Overflow to infinity, underflow to 0. EXPECT_TRUE(safe_strtod("1e310", &result)); EXPECT_EQ(std::numeric_limits::infinity(), result); @@ -296,6 +348,41 @@ TEST(safe_strtod, Double) { EXPECT_TRUE(safe_strtod("1e-325", &result)); EXPECT_EQ(0, result); + + EXPECT_TRUE(safe_strtod(" -0x1c", &result)); + EXPECT_EQ(-28.0, result); + + EXPECT_TRUE(safe_strtod("50 \t", &result)); + EXPECT_EQ(50.0, result); + + EXPECT_TRUE(safe_strtod("\t82.0\t ", &result)); + EXPECT_EQ(82.0, result); + + EXPECT_FALSE(safe_strtod("infinity", &result)); + + EXPECT_TRUE(safe_strtod("-inf", &result)); + EXPECT_EQ(-std::numeric_limits::infinity(), result); + + EXPECT_TRUE(safe_strtod("+inf", &result)); + EXPECT_EQ(std::numeric_limits::infinity(), result); + + EXPECT_TRUE(safe_strtod("InF", &result)); + EXPECT_EQ(std::numeric_limits::infinity(), result); + + EXPECT_TRUE(safe_strtod("-INF", &result)); + EXPECT_EQ(-std::numeric_limits::infinity(), result); + + EXPECT_TRUE(safe_strtod("nan", &result)); + EXPECT_TRUE(std::isnan(result)); + + EXPECT_TRUE(safe_strtod("-nan", &result)); + EXPECT_TRUE(std::isnan(result)); + + EXPECT_TRUE(safe_strtod("-NaN", &result)); + EXPECT_TRUE(std::isnan(result)); + + EXPECT_TRUE(safe_strtod("+NAN", &result)); + EXPECT_TRUE(std::isnan(result)); } } // namespace strings diff --git a/tensorflow/core/lib/strings/str_util.cc b/tensorflow/core/lib/strings/str_util.cc index 2c9e98357a..4598b8ccc7 100644 --- a/tensorflow/core/lib/strings/str_util.cc +++ b/tensorflow/core/lib/strings/str_util.cc @@ -454,6 +454,14 @@ bool SplitAndParseAsFloats(StringPiece text, char delim, result); } +size_t Strnlen(const char* str, const size_t string_max_len) { + size_t len = 0; + while (len < string_max_len && str[len] != '\0') { + ++len; + } + return len; +} + bool StrContains(StringPiece haystack, StringPiece needle) { return std::search(haystack.begin(), haystack.end(), needle.begin(), needle.end()) != haystack.end(); diff --git a/tensorflow/core/lib/strings/str_util.h b/tensorflow/core/lib/strings/str_util.h index 065871c1b4..e97d00b975 100644 --- a/tensorflow/core/lib/strings/str_util.h +++ b/tensorflow/core/lib/strings/str_util.h @@ -223,6 +223,11 @@ std::vector Split(StringPiece text, char delims, Predicate p) { return Split(text, StringPiece(&delims, 1), p); } +// Returns the length of the given null-terminated byte string 'str'. +// Returns 'string_max_len' if the null character was not found in the first +// 'string_max_len' bytes of 'str'. +size_t Strnlen(const char* str, const size_t string_max_len); + } // namespace str_util } // namespace tensorflow diff --git a/tensorflow/core/lib/strings/str_util_test.cc b/tensorflow/core/lib/strings/str_util_test.cc index 63643c3e8e..3bf3e99825 100644 --- a/tensorflow/core/lib/strings/str_util_test.cc +++ b/tensorflow/core/lib/strings/str_util_test.cc @@ -430,56 +430,12 @@ TEST(StringReplace, EmptyStringReplaceAll) { EXPECT_EQ("", str_util::StringReplace("", "a", "X", /*replace_all=*/true)); } -TEST(StartsWith, Basic) { - const string s1( - "123" - "\0" - "456", - 7); - const StringPiece a("foobar"); - const StringPiece b(s1); - const StringPiece e; - EXPECT_TRUE(str_util::StartsWith(a, a)); - EXPECT_TRUE(str_util::StartsWith(a, "foo")); - EXPECT_TRUE(str_util::StartsWith(a, e)); - EXPECT_TRUE(str_util::StartsWith(b, s1)); - EXPECT_TRUE(str_util::StartsWith(b, b)); - EXPECT_TRUE(str_util::StartsWith(b, e)); - EXPECT_TRUE(str_util::StartsWith(e, "")); - EXPECT_FALSE(str_util::StartsWith(a, b)); - EXPECT_FALSE(str_util::StartsWith(b, a)); - EXPECT_FALSE(str_util::StartsWith(e, a)); -} - -TEST(EndsWith, Basic) { - const string s1( - "123" - "\0" - "456", - 7); - const StringPiece a("foobar"); - const StringPiece b(s1); - const StringPiece e; - EXPECT_TRUE(str_util::EndsWith(a, a)); - EXPECT_TRUE(str_util::EndsWith(a, "bar")); - EXPECT_TRUE(str_util::EndsWith(a, e)); - EXPECT_TRUE(str_util::EndsWith(b, s1)); - EXPECT_TRUE(str_util::EndsWith(b, b)); - EXPECT_TRUE(str_util::EndsWith(b, e)); - EXPECT_TRUE(str_util::EndsWith(e, "")); - EXPECT_FALSE(str_util::EndsWith(a, b)); - EXPECT_FALSE(str_util::EndsWith(b, a)); - EXPECT_FALSE(str_util::EndsWith(e, a)); -} - -TEST(StrContains, Basic) { - StringPiece a("abcdefg"); - StringPiece b("abcd"); - StringPiece c("efg"); - StringPiece d("gh"); - EXPECT_TRUE(str_util::StrContains(a, b)); - EXPECT_TRUE(str_util::StrContains(a, c)); - EXPECT_TRUE(!str_util::StrContains(a, d)); +TEST(Strnlen, Basic) { + EXPECT_EQ(0, str_util::Strnlen("ab", 0)); + EXPECT_EQ(1, str_util::Strnlen("a", 1)); + EXPECT_EQ(2, str_util::Strnlen("abcd", 2)); + EXPECT_EQ(3, str_util::Strnlen("abc", 10)); + EXPECT_EQ(4, str_util::Strnlen("a \t\n", 10)); } } // namespace tensorflow diff --git a/tensorflow/tools/lib_package/BUILD b/tensorflow/tools/lib_package/BUILD index 0ede8c6370..569b6678ca 100644 --- a/tensorflow/tools/lib_package/BUILD +++ b/tensorflow/tools/lib_package/BUILD @@ -118,6 +118,7 @@ genrule( "@com_googlesource_code_re2//:LICENSE", "@cub_archive//:LICENSE.TXT", "@curl//:COPYING", + "@double_conversion//:LICENSE", "@eigen_archive//:COPYING.MPL2", "@farmhash_archive//:COPYING", "@fft2d//:fft/readme.txt", @@ -155,6 +156,7 @@ genrule( "@com_googlesource_code_re2//:LICENSE", "@cub_archive//:LICENSE.TXT", "@curl//:COPYING", + "@double_conversion//:LICENSE", "@eigen_archive//:COPYING.MPL2", "@farmhash_archive//:COPYING", "@fft2d//:fft/readme.txt", diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD index 0ac5a5bb6d..7b508f87ab 100644 --- a/tensorflow/tools/pip_package/BUILD +++ b/tensorflow/tools/pip_package/BUILD @@ -128,6 +128,7 @@ filegroup( "@com_googlesource_code_re2//:LICENSE", "@cub_archive//:LICENSE.TXT", "@curl//:COPYING", + "@double_conversion//:LICENSE", "@eigen_archive//:COPYING.MPL2", "@farmhash_archive//:COPYING", "@fft2d//:fft/readme.txt", diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index aeaf8d7a24..bbef4b9e5f 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -693,6 +693,16 @@ def tf_workspace(path_prefix="", tf_repo_name=""): build_file = clean_dep("//third_party/flatbuffers:flatbuffers.BUILD"), ) + native.new_http_archive( + name = "double_conversion", + urls = [ + "https://github.com/google/double-conversion/archive/3992066a95b823efc8ccc1baf82a1cfc73f6e9b8.zip", + ], + sha256 = "2f7fbffac0d98d201ad0586f686034371a6d152ca67508ab611adc2386ad30de", + strip_prefix = "double-conversion-3992066a95b823efc8ccc1baf82a1cfc73f6e9b8", + build_file = clean_dep("//third_party:double_conversion.BUILD") + ) + tf_http_archive( name = "tflite_mobilenet", sha256 = "23f814d1c076bdf03715dfb6cab3713aa4fbdf040fd5448c43196bd2e97a4c1b", diff --git a/third_party/double_conversion.BUILD b/third_party/double_conversion.BUILD new file mode 100644 index 0000000000..9f905216c0 --- /dev/null +++ b/third_party/double_conversion.BUILD @@ -0,0 +1,38 @@ +# Bazel(http://bazel.io) BUILD file + +licenses(["notice"]) + +exports_files(["LICENSE"]) + +cc_library( + name = "double-conversion", + srcs = [ + "double-conversion/bignum.cc", + "double-conversion/bignum-dtoa.cc", + "double-conversion/cached-powers.cc", + "double-conversion/diy-fp.cc", + "double-conversion/double-conversion.cc", + "double-conversion/fast-dtoa.cc", + "double-conversion/fixed-dtoa.cc", + "double-conversion/strtod.cc", + "double-conversion/utils.h", + ], + hdrs = [ + "double-conversion/bignum.h", + "double-conversion/bignum-dtoa.h", + "double-conversion/cached-powers.h", + "double-conversion/diy-fp.h", + "double-conversion/double-conversion.h", + "double-conversion/fast-dtoa.h", + "double-conversion/fixed-dtoa.h", + "double-conversion/ieee.h", + "double-conversion/strtod.h", + ], + includes = [ + ".", + ], + linkopts = [ + "-lm", + ], + visibility = ["//visibility:public"], +) -- GitLab From 5fbb1feecd77a70b32d333b56bd13b1798b9a766 Mon Sep 17 00:00:00 2001 From: James Qin Date: Fri, 20 Apr 2018 11:23:29 -0700 Subject: [PATCH 081/434] Temporarily set cudnn Rnn math precision to fp32. Problem: When calling cudnnGetRNNLinLayerMatrixParams(), return error CUDNN_STATUS_BAD_PARAM if: * RNN descriptor set math precision = CUDNN_DATA_FLOAT * input descriptor dataType = CUDNN_DATA_HALF * weight descriptor dataType= CUDNN_DATA_HALF If updating Rnn descriptor math precision to CUDNN_DATA_HALF, then no error. cudnn 7.1.4 will fix the problem. PiperOrigin-RevId: 193696566 --- tensorflow/stream_executor/cuda/cuda_dnn.cc | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc index d673e19007..640f270323 100644 --- a/tensorflow/stream_executor/cuda/cuda_dnn.cc +++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc @@ -2529,12 +2529,20 @@ cudnnDataType_t GetConvComputeType() { } // A helper struct to decide whether to use FP32 as the internal compute type -// for rnn when the input data type is FP16. By default it is turned on, -// users can explicitly disable them (choose to use FP16 as the internal compute -// type) through an env-var "TF_FP16_RNN_USE_FP32_COMPUTE=0". +// for rnn when the input data type is FP16. At present it is turned off, +// users can explicitly control them through an env-var +// TF_FP16_RNN_USE_FP32_COMPUTE. +// After the TODO below is fixed, users should almost always use fp32 compute +// type for training. Using fp16 might suffer suboptimal accuracy due to loss +// in precision. struct RnnDoFP32ComputationFP16Input { static constexpr const char* kName = "TF_FP16_RNN_USE_FP32_COMPUTE"; - static constexpr bool kDefaultFlag = true; + // TODO(jamesqin): b/78182362 flip to true when cudnn 7.1.4 fixes the bug. + // Before cudnn 7.1.4 RNN are always done in fp32, no matter what math + // precision is set. + // Set it temporary to false s.t. no error is raised when using fp16 inputs, + // fp32 math precision. + static constexpr bool kDefaultFlag = false; }; // A helper function to return the internal compute type for -- GitLab From 712bbc5d7babd523951445f361f0e339061cd259 Mon Sep 17 00:00:00 2001 From: Akshay Modi Date: Fri, 20 Apr 2018 11:24:53 -0700 Subject: [PATCH 082/434] Allow creating tensors from numpy arrays, and other various constants - try #2 Allow type-inference from a different input tensor, similar to args_to_matching_eager. - Update TFE_Py_TensorShapeSlice to take tuples. - Update int values to allow int/long in py2 END_PUBLIC BEGIN_PUBLIC Automated g4 rollback of changelist 192184809 PiperOrigin-RevId: 193696790 --- tensorflow/python/eager/pywrap_tensor.cc | 201 ++++++++-------- tensorflow/python/eager/pywrap_tensor.h | 10 + tensorflow/python/eager/pywrap_tfe.h | 12 +- tensorflow/python/eager/pywrap_tfe_src.cc | 278 +++++++++++++++++++--- tensorflow/python/eager/tensor_test.py | 7 +- tensorflow/python/framework/ops.py | 16 ++ 6 files changed, 389 insertions(+), 135 deletions(-) diff --git a/tensorflow/python/eager/pywrap_tensor.cc b/tensorflow/python/eager/pywrap_tensor.cc index 519814b979..b5b4e394e3 100644 --- a/tensorflow/python/eager/pywrap_tensor.cc +++ b/tensorflow/python/eager/pywrap_tensor.cc @@ -60,42 +60,6 @@ TFE_TensorHandle* NumpyToTensorHandle(PyObject* obj) { } } -// Casts data referred to by `handle` from type `src_type_enum` to type -// `dst_type_enum`. -TFE_TensorHandle* EagerCast(TFE_Context* ctx, TFE_TensorHandle* handle, - TF_DataType src_type_enum, - TF_DataType dst_type_enum, TF_Status* out_status) { - if (ctx == nullptr) return nullptr; - const char* op_name = "Cast"; - const char* device_name = "/job:localhost/replica:0/task:0/device:CPU:0"; - TFE_Op* op = TFE_NewOp(ctx, op_name, out_status); -#define RETURN_ERROR \ - { \ - TFE_DeleteOp(op); \ - return nullptr; \ - } - if (TF_GetCode(out_status) != TF_OK) RETURN_ERROR - TFE_OpSetDevice(op, device_name, out_status); - if (TF_GetCode(out_status) != TF_OK) RETURN_ERROR - TFE_OpAddInput(op, handle, out_status); - if (TF_GetCode(out_status) != TF_OK) RETURN_ERROR - TFE_OpSetAttrType(op, "SrcT", src_type_enum); - TFE_OpSetAttrType(op, "DstT", dst_type_enum); - TFE_TensorHandle* output = nullptr; - int num_outputs = 1; - TFE_Execute(op, &output, &num_outputs, out_status); - if (TF_GetCode(out_status) != TF_OK || num_outputs != 1 || - output == nullptr) { - if (output != nullptr) { - TFE_DeleteTensorHandle(output); - } - RETURN_ERROR - } - TFE_DeleteOp(op); - return output; -#undef RETURN_ERROR -} - TFE_TensorHandle* CopyToDevice(TFE_TensorHandle* handle, PyObject* ctx, PyObject* dev) { const char* device = ""; @@ -161,6 +125,100 @@ PyObject* PyIntFromDataType(TF_DataType l) { } // namespace +namespace tensorflow { +// Casts data referred to by `handle` from type `src_type_enum` to type +// `dst_type_enum`. +TFE_TensorHandle* EagerCast(TFE_Context* ctx, TFE_TensorHandle* handle, + TF_DataType src_type_enum, + TF_DataType dst_type_enum, TF_Status* out_status) { + if (ctx == nullptr) return nullptr; + const char* op_name = "Cast"; + const char* device_name = "/job:localhost/replica:0/task:0/device:CPU:0"; + TFE_Op* op = TFE_NewOp(ctx, op_name, out_status); +#define RETURN_ERROR \ + { \ + TFE_DeleteOp(op); \ + return nullptr; \ + } + if (TF_GetCode(out_status) != TF_OK) RETURN_ERROR + TFE_OpSetDevice(op, device_name, out_status); + if (TF_GetCode(out_status) != TF_OK) RETURN_ERROR + TFE_OpAddInput(op, handle, out_status); + if (TF_GetCode(out_status) != TF_OK) RETURN_ERROR + TFE_OpSetAttrType(op, "SrcT", src_type_enum); + TFE_OpSetAttrType(op, "DstT", dst_type_enum); + TFE_TensorHandle* output = nullptr; + int num_outputs = 1; + TFE_Execute(op, &output, &num_outputs, out_status); + if (TF_GetCode(out_status) != TF_OK || num_outputs != 1 || + output == nullptr) { + if (output != nullptr) { + TFE_DeleteTensorHandle(output); + } + RETURN_ERROR + } + TFE_DeleteOp(op); + return output; +#undef RETURN_ERROR +} + +TFE_TensorHandle* ConvertToEagerTensor(PyObject* value, PyObject* dtype) { + int desired_dtype = -1; + if (dtype != Py_None) { + if (!PyIntToDataType(dtype, &desired_dtype)) { + PyErr_SetString(PyExc_TypeError, + tensorflow::strings::StrCat( + "Expecting a DataType value for dtype. Got ", + Py_TYPE(dtype)->tp_name) + .c_str()); + return nullptr; + } + } + if (PyArray_Check(value)) { + int desired_np_dtype = -1; + if (desired_dtype >= 0) { + if (!tensorflow::TF_DataType_to_PyArray_TYPE( + static_cast(desired_dtype), &desired_np_dtype) + .ok()) { + PyErr_SetString(PyExc_TypeError, + tensorflow::strings::StrCat( + "Invalid dtype argument value ", desired_dtype) + .c_str()); + return nullptr; + } + } + PyArrayObject* array = reinterpret_cast(value); + int current_np_dtype = PyArray_TYPE(array); + auto safe_value = tensorflow::make_safe(static_cast(nullptr)); + if ((desired_np_dtype >= 0 && desired_np_dtype != current_np_dtype) || + !PyArray_ISCARRAY(array)) { + int new_dtype = + desired_np_dtype >= 0 ? desired_np_dtype : current_np_dtype; + safe_value = tensorflow::make_safe( + PyArray_FromAny(value, PyArray_DescrFromType(new_dtype), 0, 0, + NPY_ARRAY_CARRAY | NPY_ARRAY_FORCECAST, nullptr)); + if (PyErr_Occurred()) return nullptr; + if (safe_value == nullptr) { + PyErr_SetString(PyExc_ValueError, "Error while casting a numpy value"); + return nullptr; + } + value = safe_value.get(); + } + return NumpyToTensorHandle(value); + } else { + tensorflow::Tensor t; + // TODO(josh11b): Have PySeqToTensor set python errors instead of + // returning Status. + auto cppstatus = tensorflow::PySeqToTensor(value, dtype, &t); + if (!cppstatus.ok()) { + PyErr_SetString(PyExc_ValueError, cppstatus.error_message().c_str()); + return nullptr; + } + return TFE_NewTensorHandle(t); + } +} +} // namespace tensorflow + extern "C" { static const int kMaxEagerTensorParentSize = 64; @@ -230,61 +288,16 @@ int EagerTensor_init(EagerTensor* self, PyObject* args, PyObject* kwds) { return -1; } } - tensorflow::Safe_TFE_TensorHandlePtr handle = - tensorflow::make_safe(static_cast(nullptr)); PyErr_Clear(); - if (PyArray_Check(value)) { - int desired_np_dtype = -1; - if (desired_dtype >= 0) { - if (!tensorflow::TF_DataType_to_PyArray_TYPE( - static_cast(desired_dtype), &desired_np_dtype) - .ok()) { - PyErr_SetString(PyExc_TypeError, - tensorflow::strings::StrCat( - "Invalid dtype argument value ", desired_dtype) - .c_str()); - return -1; - } - } - PyArrayObject* array = reinterpret_cast(value); - int current_np_dtype = PyArray_TYPE(array); - auto safe_value = tensorflow::make_safe(static_cast(nullptr)); - if ((desired_np_dtype >= 0 && desired_np_dtype != current_np_dtype) || - !PyArray_ISCARRAY(array)) { - int new_dtype = - desired_np_dtype >= 0 ? desired_np_dtype : current_np_dtype; - safe_value = tensorflow::make_safe( - PyArray_FromAny(value, PyArray_DescrFromType(new_dtype), 0, 0, - NPY_ARRAY_CARRAY | NPY_ARRAY_FORCECAST, nullptr)); - if (PyErr_Occurred()) return -1; - if (safe_value == nullptr) { - PyErr_SetString(PyExc_ValueError, "Error while casting a numpy value"); - return -1; - } - value = safe_value.get(); - } - handle = tensorflow::make_safe(NumpyToTensorHandle(value)); - } else { - tensorflow::Tensor t; - // TODO(josh11b): Have PySeqToTensor set python errors instead of - // returning Status. - auto cppstatus = tensorflow::PySeqToTensor(value, dtype, &t); - if (!cppstatus.ok()) { - PyErr_SetString(PyExc_ValueError, cppstatus.error_message().c_str()); - return -1; - } - handle = tensorflow::make_safe(TFE_NewTensorHandle(t)); - } - if (PyErr_Occurred()) return -1; - if (handle == nullptr) { - PyErr_SetString(PyExc_ValueError, "Error while creating an EagerTensor"); - return -1; - } + tensorflow::Safe_TFE_TensorHandlePtr handle = + tensorflow::make_safe(static_cast( + tensorflow::ConvertToEagerTensor(value, dtype))); + if (handle == nullptr) return -1; TF_DataType handle_dtype = TFE_TensorHandleDataType(handle.get()); if (desired_dtype >= 0 && desired_dtype != handle_dtype) { - handle = tensorflow::make_safe( - EagerCast(GetContext(context), handle.get(), handle_dtype, - static_cast(desired_dtype), self->status)); + handle = tensorflow::make_safe(tensorflow::EagerCast( + GetContext(context), handle.get(), handle_dtype, + static_cast(desired_dtype), self->status)); if (TF_GetCode(self->status) != TF_OK) { PyErr_SetString(PyExc_ValueError, tensorflow::strings::StrCat( @@ -701,12 +714,12 @@ PyObject* TFE_Py_InitEagerTensor(PyObject* base_class) { return reinterpret_cast(EagerTensorType); } -PyObject* TFE_Py_TensorShapeSlice(PyObject* tensor_list, int slice_dim) { - if (!PyList_Check(tensor_list)) { +PyObject* TFE_Py_TensorShapeSlice(PyObject* tensors, int slice_dim) { + if (!PyList_Check(tensors) && !PyTuple_Check(tensors)) { PyErr_SetString(PyExc_TypeError, tensorflow::strings::StrCat( - "tensor_list argument must be a list. Got \"", - Py_TYPE(tensor_list)->tp_name, "\"") + "tensors argument must be a list or a tuple. Got \"", + Py_TYPE(tensors)->tp_name, "\"") .c_str()); return nullptr; } @@ -720,14 +733,14 @@ PyObject* TFE_Py_TensorShapeSlice(PyObject* tensor_list, int slice_dim) { return nullptr; } - Py_ssize_t num_tensors = PyList_Size(tensor_list); + Py_ssize_t num_tensors = PySequence_Fast_GET_SIZE(tensors); int64_t num_tensors_int = static_cast(num_tensors); auto tensor = tensorflow::make_safe(TF_AllocateTensor( TF_INT32, &num_tensors_int, /*num_dims=*/1, /*len=*/4 * num_tensors_int)); int32_t* data = reinterpret_cast(TF_TensorData(tensor.get())); auto status = tensorflow::make_safe(TF_NewStatus()); for (Py_ssize_t i = 0; i < num_tensors; ++i) { - PyObject* tensor_obj = PyList_GET_ITEM(tensor_list, i); + PyObject* tensor_obj = PySequence_Fast_GET_ITEM(tensors, i); if (!EagerTensor_CheckExact(tensor_obj)) { PyErr_SetString(PyExc_TypeError, tensorflow::strings::StrCat( diff --git a/tensorflow/python/eager/pywrap_tensor.h b/tensorflow/python/eager/pywrap_tensor.h index aa1efdd1b8..63ab1ed84d 100644 --- a/tensorflow/python/eager/pywrap_tensor.h +++ b/tensorflow/python/eager/pywrap_tensor.h @@ -22,4 +22,14 @@ limitations under the License. bool EagerTensor_CheckExact(const PyObject* o); tensorflow::int64 EagerTensor_id(const PyObject* tensor); +namespace tensorflow { +TFE_TensorHandle* ConvertToEagerTensor(PyObject* value, PyObject* dtype); + +// TODO(nareshmodi): Move EagerCast and ReadVariableOp (which use the C API to +// execute TFE Ops) to a separate common library. +TFE_TensorHandle* EagerCast(TFE_Context* ctx, TFE_TensorHandle* handle, + TF_DataType src_type_enum, + TF_DataType dst_type_enum, TF_Status* out_status); +} + #endif // TENSORFLOW_PYTHON_EAGER_PYWRAP_TENSOR_H_ diff --git a/tensorflow/python/eager/pywrap_tfe.h b/tensorflow/python/eager/pywrap_tfe.h index 32d731d0f6..691b613e48 100644 --- a/tensorflow/python/eager/pywrap_tfe.h +++ b/tensorflow/python/eager/pywrap_tfe.h @@ -186,16 +186,16 @@ PyObject* TFE_Py_RecordGradient(PyObject* op_name, PyObject* inputs, // Returns the set of variables watched by the given tape. PyObject* TFE_Py_TapeWatchedVariables(PyObject* tape); -// Returns an EagerTensor of dimension [len(`tensor_list`)] containing -// the `slice_dim`'th dimension of each tensor in `tensor_list`. In other words, +// Returns an EagerTensor of dimension [len(`tensors`)] containing +// the `slice_dim`'th dimension of each tensor in `tensors`. In other words, // TFE_Py_TensorShapeSlice takes a slice of dimensions of tensors in -// `tensor_list`. For example, if `tensor_list` contains tensors of with shapes +// `tensors`. For example, if `tensors` contains tensors of with shapes // [1, 2, 3], [4, 5], [6, 7, 8, 9], TFE_Py_TensorShapeSlice called with // `slice_dim` equal to 1 will return [2, 5, 7]. // On error, returns nullptr and sets python exception. -// REQUIRES: `tensor_list` is a python list of EagerTensors +// REQUIRES: `tensors` is a python list/tuple of EagerTensors // REQUIRES: `slice_dim` is non-negative and smaller than the rank of all -// tensors in `tensor_list`. -PyObject* TFE_Py_TensorShapeSlice(PyObject* tensor_list, int slice_dim); +// tensors in `tensors`. +PyObject* TFE_Py_TensorShapeSlice(PyObject* tensors, int slice_dim); #endif // TENSORFLOW_PYTHON_EAGER_PYWRAP_TFE_H_ diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc index d99bd0b0ff..2bfa1f052c 100644 --- a/tensorflow/python/eager/pywrap_tfe_src.cc +++ b/tensorflow/python/eager/pywrap_tfe_src.cc @@ -38,6 +38,54 @@ using tensorflow::strings::Printf; namespace { +struct InputInfo { + InputInfo(int i, bool is_list) : i(i), is_list(is_list) {} + + int i; + bool is_list = false; +}; + +using AttrToInputsMap = + tensorflow::gtl::FlatMap>; + +tensorflow::mutex all_attr_to_input_maps_lock( + tensorflow::LINKER_INITIALIZED); +tensorflow::gtl::FlatMap* GetAllAttrToInputsMaps() { + static auto* all_attr_to_input_maps = + new tensorflow::gtl::FlatMap; + return all_attr_to_input_maps; +} + +AttrToInputsMap* GetAttrToInputsMap(const tensorflow::OpDef& op_def) { + tensorflow::mutex_lock l(all_attr_to_input_maps_lock); + auto* all_attr_to_input_maps = GetAllAttrToInputsMaps(); + + auto* output = + tensorflow::gtl::FindPtrOrNull(*all_attr_to_input_maps, op_def.name()); + if (output != nullptr) { + return output; + } + + std::unique_ptr m(new AttrToInputsMap); + + // Store a list of InputIndex -> List of corresponding inputs. + for (int i = 0; i < op_def.input_arg_size(); i++) { + if (!op_def.input_arg(i).type_attr().empty()) { + auto it = m->find(op_def.input_arg(i).type_attr()); + if (it == m->end()) { + it = m->insert({op_def.input_arg(i).type_attr(), {}}).first; + } + it->second.emplace_back(i, !op_def.input_arg(i).number_attr().empty()); + } + } + + auto* retval = m.get(); + (*all_attr_to_input_maps)[op_def.name()] = m.release(); + + return retval; +} + struct FastPathOpExecInfo { TFE_Context* ctx; const char* device_name; @@ -53,6 +101,14 @@ struct FastPathOpExecInfo { // The op type name of the main op being executed. PyObject* op_name; PyObject* callbacks; + + // All the args passed into the FastPathOpExecInfo. + PyObject* args; + + // DTypes can come from another input that has the same attr. So build that + // map. + const AttrToInputsMap* attr_to_inputs_map; + tensorflow::gtl::FlatMap cached_dtypes; }; #define PARSE_VALUE(fn_name, type, check_fn, parse_fn) \ @@ -76,12 +132,29 @@ PARSE_VALUE(ParseIntValue, int, PyLong_Check, PyLong_AsLong) PARSE_VALUE(ParseInt64Value, int64_t, PyLong_Check, PyLong_AsLong) #else PARSE_VALUE(ParseIntValue, int, PyInt_Check, PyInt_AsLong) -PARSE_VALUE(ParseInt64Value, int64_t, PyInt_Check, PyInt_AsLong) -PARSE_VALUE(ParseInt64LongValue, int64_t, PyLong_Check, PyLong_AsLong) #endif PARSE_VALUE(ParseFloatValue, float, PyFloat_Check, PyFloat_AsDouble) #undef PARSE_VALUE +#if PY_MAJOR_VERSION < 3 +bool ParseInt64Value(const string& key, PyObject* py_value, TF_Status* status, + int64_t* value) { + if (PyInt_Check(py_value)) { + *value = static_cast(PyInt_AsLong(py_value)); + return true; + } else if (PyLong_Check(py_value)) { + *value = static_cast(PyLong_AsLong(py_value)); + return true; + } + TF_SetStatus( + status, TF_INVALID_ARGUMENT, + tensorflow::strings::StrCat("Expecting int or long value for attr ", key, + ", got ", py_value->ob_type->tp_name) + .c_str()); + return false; +} +#endif + Py_ssize_t TensorShapeNumDims(PyObject* value) { const auto size = PySequence_Size(value); if (size == -1) { @@ -234,7 +307,7 @@ bool SetOpAttrList( std::unique_ptr buffer(new int64_t[total_dims]); // Copy the input dims into the buffer and set dims to point to // the start of each list's dims. - std::unique_ptr dims(new const int64_t*[num_values]); + std::unique_ptr dims(new const int64_t*[num_values]); std::unique_ptr num_dims(new int[num_values]); int64_t* offset = buffer.get(); for (int i = 0; i < num_values; ++i) { @@ -296,7 +369,7 @@ void SetOpAttrListDefault( TF_Status* status) { if (type == TF_ATTR_STRING) { int num_values = attr.default_value().list().s_size(); - std::unique_ptr values(new const char*[num_values]); + std::unique_ptr values(new const char*[num_values]); (*attr_list_sizes)[key] = num_values; for (int i = 0; i < num_values; i++) { values[i] = attr.default_value().list().s(i).data(); @@ -349,7 +422,7 @@ void SetOpAttrListDefault( std::unique_ptr buffer(new int64_t[total_dims]); // Copy the input dims into the buffer and set dims to point to // the start of each list's dims. - std::unique_ptr dims(new const int64_t*[num_values]); + std::unique_ptr dims(new const int64_t*[num_values]); std::unique_ptr num_dims(new int[num_values]); int64_t* offset = buffer.get(); for (int i = 0; i < num_values; ++i) { @@ -369,7 +442,7 @@ void SetOpAttrListDefault( } else if (type == TF_ATTR_FUNC) { int num_values = attr.default_value().list().func_size(); (*attr_list_sizes)[key] = num_values; - std::unique_ptr funcs(new const TFE_Op*[num_values]); + std::unique_ptr funcs(new const TFE_Op*[num_values]); for (int i = 0; i < num_values; i++) { funcs[i] = GetFunc(ctx, attr.default_value().list().func(i), status); } @@ -1399,10 +1472,39 @@ PyObject* GetPythonObjectFromString(const char* s) { #endif } +PyObject* GetPythonObjectFromInt(int num) { +#if PY_MAJOR_VERSION >= 3 + return PyLong_FromLong(num); +#else + return PyInt_FromLong(num); +#endif +} + bool CheckResourceVariable(PyObject* item) { return PyObject_TypeCheck(item, resource_variable_type); } +bool IsNumberType(PyObject* item) { +#if PY_MAJOR_VERSION >= 3 + return PyFloat_Check(item) || PyLong_Check(item); +#else + return PyFloat_Check(item) || PyInt_Check(item) || PyLong_Check(item); +#endif +} + +bool CheckOneInput(PyObject* item) { + if (EagerTensor_CheckExact(item) || CheckResourceVariable(item) || + PyArray_Check(item) || IsNumberType(item)) { + return true; + } + + // Sequences are not properly handled. Sequences with purely python numeric + // types work, but sequences with mixes of EagerTensors and python numeric + // types don't work. + // TODO(nareshmodi): fix + return false; +} + bool CheckInputsOk(PyObject* seq, int start_index, const tensorflow::OpDef& op_def) { for (int i = 0; i < op_def.input_arg_size(); i++) { @@ -1419,8 +1521,7 @@ bool CheckInputsOk(PyObject* seq, int start_index, } for (Py_ssize_t j = 0; j < PySequence_Fast_GET_SIZE(item); j++) { PyObject* inner_item = PySequence_Fast_GET_ITEM(item, j); - if (!EagerTensor_CheckExact(inner_item) && - !CheckResourceVariable(inner_item)) { + if (!CheckOneInput(inner_item)) { VLOG(1) << "Falling back to slow path for Op \"" << op_def.name() << "\", Input \"" << op_def.input_arg(i).name() << "\", Index " @@ -1430,7 +1531,7 @@ bool CheckInputsOk(PyObject* seq, int start_index, return false; } } - } else if (!EagerTensor_CheckExact(item) && !CheckResourceVariable(item)) { + } else if (!CheckOneInput(item)) { VLOG(1) << "Falling back to slow path for Op \"" << op_def.name() << "\", Input \"" << op_def.input_arg(i).name() @@ -1443,6 +1544,52 @@ bool CheckInputsOk(PyObject* seq, int start_index, return true; } +PyObject* MaybeGetDType(PyObject* item) { + if (EagerTensor_CheckExact(item)) { + tensorflow::Safe_PyObjectPtr py_dtype( + PyObject_GetAttrString(item, "dtype")); + return PyObject_GetAttrString(py_dtype.get(), "_type_enum"); + } + + if (CheckResourceVariable(item)) { + tensorflow::Safe_PyObjectPtr py_dtype( + PyObject_GetAttrString(item, "_dtype")); + return PyObject_GetAttrString(py_dtype.get(), "_type_enum"); + } + + return nullptr; +} + +PyObject* MaybeGetDTypeForAttr(const string& attr, + FastPathOpExecInfo* op_exec_info) { + auto cached_it = op_exec_info->cached_dtypes.find(attr); + if (cached_it != op_exec_info->cached_dtypes.end()) { + return GetPythonObjectFromInt(cached_it->second); + } + + auto it = op_exec_info->attr_to_inputs_map->find(attr); + if (it == op_exec_info->attr_to_inputs_map->end()) { + // No other inputs - this should never happen. + Py_RETURN_NONE; + } + + for (const auto& input_info : it->second) { + PyObject* item = PyTuple_GET_ITEM( + op_exec_info->args, kFastPathExecuteInputStartIndex + input_info.i); + if (input_info.is_list) { + for (int i = 0; i < PySequence_Fast_GET_SIZE(item); i++) { + auto* dtype = MaybeGetDType(PySequence_Fast_GET_ITEM(item, i)); + if (dtype != nullptr) return dtype; + } + } else { + auto* dtype = MaybeGetDType(item); + if (dtype != nullptr) return dtype; + } + } + + Py_RETURN_NONE; +} + bool OpDoesntRequireOutput(const string& op_name) { static tensorflow::gtl::FlatSet* ops_that_dont_require_outputs = new tensorflow::gtl::FlatSet({ @@ -1668,23 +1815,80 @@ bool ReadVariableOp(const FastPathOpExecInfo& parent_op_exec_info, // i) input is an EagerTensor // ii) input is a ResourceVariable - in this case, the is_variable param is set // to true. -bool ConvertToTensor(const FastPathOpExecInfo& op_exec_info, PyObject* input, - tensorflow::Safe_PyObjectPtr* output_handle, - TF_Status* status) { - if (CheckResourceVariable(input)) { +// +// NOTE: dtype_hint_getter must *always* return a PyObject that can be +// decref'd. So if no hint is found, Py_RETURN_NONE (which correctly +// increfs Py_None). +bool ConvertToTensor( + const FastPathOpExecInfo& op_exec_info, PyObject* input, + tensorflow::Safe_PyObjectPtr* output_handle, + // This gets a hint for this particular input. + const std::function& dtype_hint_getter, + // This sets the dtype after conversion is complete. + const std::function& dtype_setter, + TF_Status* status) { + if (EagerTensor_CheckExact(input)) { + Py_INCREF(input); + output_handle->reset(input); + return true; + } else if (CheckResourceVariable(input)) { return ReadVariableOp(op_exec_info, input, output_handle, status); } - Py_INCREF(input); - output_handle->reset(input); + // The hint comes from a supposedly similarly typed tensor. + tensorflow::Safe_PyObjectPtr dtype_hint(dtype_hint_getter()); + if (PyErr_Occurred()) { + return false; + } + + tensorflow::Safe_TFE_TensorHandlePtr handle = + tensorflow::make_safe(static_cast( + tensorflow::ConvertToEagerTensor(input, dtype_hint.get()))); + if (handle == nullptr) { + status->status = tensorflow::errors::InvalidArgument( + "Unable to convert value to tensor"); + return false; + } + + int desired_dtype = -1; + if (dtype_hint.get() != Py_None) { + if (!ParseTypeValue("", dtype_hint.get(), status, &desired_dtype)) { + status->status = tensorflow::errors::InvalidArgument( + "Expecting a DataType value for dtype. Got ", + Py_TYPE(dtype_hint.get())->tp_name); + } + } + + TF_DataType handle_dtype = TFE_TensorHandleDataType(handle.get()); + if (desired_dtype >= 0 && desired_dtype != handle_dtype) { + handle = tensorflow::make_safe( + tensorflow::EagerCast(op_exec_info.ctx, handle.get(), handle_dtype, + static_cast(desired_dtype), status)); + if (!status->status.ok()) return false; + + handle_dtype = TFE_TensorHandleDataType(handle.get()); + } + + if (handle_dtype != TF_INT32) { + // Note that this is a shallow copy and will share the underlying buffer + // if copying to the same device. + handle = tensorflow::make_safe(TFE_TensorHandleCopyToDevice( + handle.get(), op_exec_info.ctx, op_exec_info.device_name, status)); + if (!status->status.ok()) return false; + } + + output_handle->reset(EagerTensorFromHandle(handle.release())); + + dtype_setter(handle_dtype); return true; } // Adds input and type attr to the op, and to the list of flattened // inputs/attrs. -bool AddInputToOp(const FastPathOpExecInfo& op_exec_info, PyObject* input, - const tensorflow::OpDef::ArgDef* input_arg, +bool AddInputToOp(FastPathOpExecInfo* op_exec_info, PyObject* input, + const bool add_type_attr, + const tensorflow::OpDef::ArgDef& input_arg, std::vector* flattened_attrs, std::vector* flattened_inputs, TFE_Op* op, TF_Status* status) { @@ -1693,18 +1897,30 @@ bool AddInputToOp(const FastPathOpExecInfo& op_exec_info, PyObject* input, // out of scope in this function. tensorflow::Safe_PyObjectPtr py_eager_tensor = nullptr; - if (!ConvertToTensor(op_exec_info, input, &py_eager_tensor, status)) { + if (!ConvertToTensor( + *op_exec_info, input, &py_eager_tensor, + [&]() { + if (input_arg.type() != tensorflow::DataType::DT_INVALID) { + return GetPythonObjectFromInt(input_arg.type()); + } + return MaybeGetDTypeForAttr(input_arg.type_attr(), op_exec_info); + }, + [&](const TF_DataType dtype) { + op_exec_info->cached_dtypes[input_arg.type_attr()] = + static_cast(dtype); + }, + status)) { return false; } TFE_TensorHandle* input_handle = EagerTensor_Handle(py_eager_tensor.get()); - if (input_arg != nullptr && !input_arg->type_attr().empty()) { + if (add_type_attr && !input_arg.type_attr().empty()) { auto dtype = TFE_TensorHandleDataType(input_handle); - TFE_OpSetAttrType(op, input_arg->type_attr().data(), dtype); + TFE_OpSetAttrType(op, input_arg.type_attr().data(), dtype); if (flattened_attrs != nullptr) { flattened_attrs->emplace_back( - GetPythonObjectFromString(input_arg->type_attr().data())); + GetPythonObjectFromString(input_arg.type_attr().data())); flattened_attrs->emplace_back(PyLong_FromLong(dtype)); } } @@ -1844,6 +2060,7 @@ PyObject* TFE_Py_FastPathExecute_C(PyObject*, PyObject* args) { op_exec_info.ctx = reinterpret_cast( PyCapsule_GetPointer(PyTuple_GET_ITEM(args, 0), nullptr)); + op_exec_info.args = args; if (op_exec_info.ctx == nullptr) { // The context hasn't been initialized. It will be in the slow path. @@ -1892,6 +2109,8 @@ PyObject* TFE_Py_FastPathExecute_C(PyObject*, PyObject* args) { return nullptr; } + op_exec_info.attr_to_inputs_map = GetAttrToInputsMap(*op_def); + TF_Status* status = TF_NewStatus(); TFE_Op* op = TFE_NewOp(op_exec_info.ctx, op_def->name().c_str(), status); auto cleaner = tensorflow::gtl::MakeCleanup([status, op] { @@ -1986,17 +2205,16 @@ PyObject* TFE_Py_FastPathExecute_C(PyObject*, PyObject* args) { if (len > 0) { // First item adds the type attr. - if (!AddInputToOp(op_exec_info, PySequence_Fast_GET_ITEM(input, 0), - &input_arg, flattened_attrs.get(), + if (!AddInputToOp(&op_exec_info, PySequence_Fast_GET_ITEM(input, 0), + true, input_arg, flattened_attrs.get(), flattened_inputs.get(), op, status)) { return nullptr; } for (Py_ssize_t j = 1; j < len; j++) { // Since the list is homogeneous, we don't need to re-add the attr. - if (!AddInputToOp(op_exec_info, PySequence_Fast_GET_ITEM(input, j), - nullptr /* input_arg */, - nullptr /* flattened_attrs */, + if (!AddInputToOp(&op_exec_info, PySequence_Fast_GET_ITEM(input, j), + false, input_arg, nullptr /* flattened_attrs */, flattened_inputs.get(), op, status)) { return nullptr; } @@ -2018,7 +2236,8 @@ PyObject* TFE_Py_FastPathExecute_C(PyObject*, PyObject* args) { PyObject* py_input = PySequence_Fast_GET_ITEM(input, j); tensorflow::Safe_PyObjectPtr py_eager_tensor; if (!ConvertToTensor(op_exec_info, py_input, &py_eager_tensor, - status)) { + []() { Py_RETURN_NONE; }, + [](const TF_DataType& dtype) {}, status)) { return nullptr; } @@ -2048,8 +2267,9 @@ PyObject* TFE_Py_FastPathExecute_C(PyObject*, PyObject* args) { attr_list_sizes[attr_name] = len; } else { // The item is a single item. - if (!AddInputToOp(op_exec_info, input, &input_arg, flattened_attrs.get(), - flattened_inputs.get(), op, status)) { + if (!AddInputToOp(&op_exec_info, input, true, input_arg, + flattened_attrs.get(), flattened_inputs.get(), op, + status)) { return nullptr; } } diff --git a/tensorflow/python/eager/tensor_test.py b/tensorflow/python/eager/tensor_test.py index 0bd5a5dbaf..b044b30231 100644 --- a/tensorflow/python/eager/tensor_test.py +++ b/tensorflow/python/eager/tensor_test.py @@ -278,14 +278,9 @@ class TFETensorUtilTest(test_util.TensorFlowTestCase): with self.assertRaisesRegexp( TypeError, - r"tensor_list argument must be a list. Got \"EagerTensor\""): + r"tensors argument must be a list or a tuple. Got \"EagerTensor\""): pywrap_tensorflow.TFE_Py_TensorShapeSlice(t1, -2) - with self.assertRaisesRegexp( - TypeError, - r"tensor_list argument must be a list. Got \"tuple\""): - pywrap_tensorflow.TFE_Py_TensorShapeSlice((t1,), -2) - def testNegativeSliceDim(self): t1 = _create_tensor([1, 2], dtype=dtypes.int32) diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index 662cda2a7d..8cd6820f6a 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -1385,6 +1385,22 @@ def register_tensor_conversion_function(base_type, if not callable(conversion_func): raise TypeError("conversion_func must be callable.") + # context._context is checked so that we don't inadvertently create it. + # This is because enable_eager_execution will fail when called from the main + # function if the context._context is already created, and the + # register_tensor_conversion_function calls happen when the module is + # imported. + if context._context is not None and context.executing_eagerly( + ) and isinstance(base_type, six.integer_types + ( + float, + np.ndarray, + )): + # TODO(nareshmodi): consider setting a context variable which disables the + # fastpath instead. + raise TypeError( + "Cannot register conversions for numpy arrays, python number types " + "when executing eagerly.") + try: funcs_at_priority = _tensor_conversion_func_registry[priority] except KeyError: -- GitLab From 76ea66f24d4370e6e7848b83fc0b571ba7edfa2d Mon Sep 17 00:00:00 2001 From: Akshay Modi Date: Fri, 20 Apr 2018 11:34:55 -0700 Subject: [PATCH 083/434] Move the guts of TFE_Op into EagerOperation PiperOrigin-RevId: 193698320 --- tensorflow/c/eager/BUILD | 2 + tensorflow/c/eager/c_api.cc | 230 +++++++++--------- tensorflow/c/eager/c_api_internal.h | 16 +- tensorflow/core/common_runtime/eager/BUILD | 16 ++ .../common_runtime/eager/eager_operation.cc | 33 +++ .../common_runtime/eager/eager_operation.h | 74 ++++++ 6 files changed, 242 insertions(+), 129 deletions(-) create mode 100644 tensorflow/core/common_runtime/eager/eager_operation.cc create mode 100644 tensorflow/core/common_runtime/eager/eager_operation.h diff --git a/tensorflow/c/eager/BUILD b/tensorflow/c/eager/BUILD index 3e14c10727..d66386acbd 100644 --- a/tensorflow/c/eager/BUILD +++ b/tensorflow/c/eager/BUILD @@ -51,6 +51,7 @@ tf_cuda_library( ], "//conditions:default": [], }) + [ + "//tensorflow/core/common_runtime/eager:eager_operation", "//tensorflow/core:gpu_runtime", ], ) @@ -73,6 +74,7 @@ tf_cuda_library( "//tensorflow/core:lib_internal", "//tensorflow/core/common_runtime/eager:context", "//tensorflow/core/common_runtime/eager:eager_executor", + "//tensorflow/core/common_runtime/eager:eager_operation", "//tensorflow/core/common_runtime/eager:kernel_and_device", "//tensorflow/core/common_runtime/eager:tensor_handle", ], diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc index 369342b142..b7a3097208 100644 --- a/tensorflow/c/eager/c_api.cc +++ b/tensorflow/c/eager/c_api.cc @@ -241,21 +241,18 @@ TFE_Op* TFE_NewOp(TFE_Context* ctx, const char* op_or_function_name, void TFE_DeleteOp(TFE_Op* op) { delete op; } void TFE_OpSetDevice(TFE_Op* op, const char* device_name, TF_Status* status) { - tensorflow::Device* d = nullptr; - if (device_name != nullptr && strlen(device_name) > 0) { - status->status = op->ctx->context.FindDeviceByName(device_name, &d); - } - op->device = d; + status->status = op->operation.SetDevice(device_name); } const char* TFE_OpGetDevice(TFE_Op* op, TF_Status* status) { - tensorflow::Device* device = - (op->device == nullptr) ? op->ctx->context.HostCPU() : op->device; + tensorflow::Device* device = (op->operation.Device() == nullptr) + ? op->operation.EagerContext()->HostCPU() + : op->operation.Device(); return device->name().c_str(); } void TFE_OpSetXLACompilation(TFE_Op* op, unsigned char enable) { - op->use_xla = enable; + op->operation.SetUseXla(enable); #ifndef TENSORFLOW_EAGER_USE_XLA LOG(WARNING) << "This call is a no-op, as the TensorFlow library is not " "built with XLA support."; @@ -263,22 +260,20 @@ void TFE_OpSetXLACompilation(TFE_Op* op, unsigned char enable) { } void TFE_OpAddInput(TFE_Op* op, TFE_TensorHandle* h, TF_Status* status) { - h->handle->Ref(); - op->inputs.push_back(h->handle); - op->attrs.NumInputs(op->inputs.size()); + op->operation.AddInput(h->handle); } TF_AttrType TFE_OpGetAttrType(TFE_Op* op, const char* attr_name, unsigned char* is_list, TF_Status* status) { TF_AttrType ret; - if (op->is_function()) { + if (op->operation.is_function()) { status->status = tensorflow::errors::Unimplemented( "TODO(apassos): Support for attributes for TensorFlow functions is not " "ready yet."); return TF_ATTR_INT; // The compiler requires that we return something. } - status->status = - tensorflow::AttrTypeByName(*op->attr_types, attr_name, &ret, is_list); + status->status = tensorflow::AttrTypeByName(*op->operation.AttrTypes(), + attr_name, &ret, is_list); return ret; } @@ -297,23 +292,24 @@ TF_AttrType TFE_OpNameGetAttrType(TFE_Context* ctx, } void TFE_OpSetAttrString(TFE_Op* op, const char* attr_name, const char* value) { - op->attrs.Set(attr_name, value); + op->operation.MutableAttrs()->Set(attr_name, value); } void TFE_OpSetAttrInt(TFE_Op* op, const char* attr_name, int64_t value) { - op->attrs.Set(attr_name, static_cast(value)); + op->operation.MutableAttrs()->Set(attr_name, static_cast(value)); } void TFE_OpSetAttrFloat(TFE_Op* op, const char* attr_name, float value) { - op->attrs.Set(attr_name, value); + op->operation.MutableAttrs()->Set(attr_name, value); } void TFE_OpSetAttrBool(TFE_Op* op, const char* attr_name, unsigned char value) { - op->attrs.Set(attr_name, (value == 0) ? false : true); + op->operation.MutableAttrs()->Set(attr_name, (value == 0) ? false : true); } void TFE_OpSetAttrType(TFE_Op* op, const char* attr_name, TF_DataType value) { - op->attrs.Set(attr_name, static_cast(value)); + op->operation.MutableAttrs()->Set(attr_name, + static_cast(value)); } void TFE_OpSetAttrShape(TFE_Op* op, const char* attr_name, const int64_t* dims, @@ -335,23 +331,24 @@ void TFE_OpSetAttrShape(TFE_Op* op, const char* attr_name, const int64_t* dims, proto.add_dim()->set_size(dims[d]); } } - op->attrs.Set(attr_name, proto); + op->operation.MutableAttrs()->Set(attr_name, proto); } void TFE_OpSetAttrFunction(TFE_Op* op, const char* attr_name, const TFE_Op* value) { tensorflow::AttrValue attr_value; tensorflow::NameAttrList* func = attr_value.mutable_func(); - func->set_name(value->name); - value->attrs.FillAttrValueMap(func->mutable_attr()); - op->attrs.Set(attr_name, attr_value); + func->set_name(value->operation.Name()); + value->operation.Attrs().FillAttrValueMap(func->mutable_attr()); + op->operation.MutableAttrs()->Set(attr_name, attr_value); } #define TFE_OP_SET_ATTR_LIST(fn, type) \ void fn(TFE_Op* op, const char* attr_name, const type* values, \ int num_values) { \ - op->attrs.Set(attr_name, tensorflow::gtl::ArraySlice( \ - values, num_values)); \ + op->operation.MutableAttrs()->Set( \ + attr_name, \ + tensorflow::gtl::ArraySlice(values, num_values)); \ } TFE_OP_SET_ATTR_LIST(TFE_OpSetAttrStringList, char*) TFE_OP_SET_ATTR_LIST(TFE_OpSetAttrFloatList, float) @@ -359,14 +356,14 @@ TFE_OP_SET_ATTR_LIST(TFE_OpSetAttrFloatList, float) void TFE_OpSetAttrIntList(TFE_Op* op, const char* attr_name, const int64_t* values, int num_values) { - op->attrs.Set(attr_name, - tensorflow::gtl::ArraySlice( - reinterpret_cast(values), num_values)); + op->operation.MutableAttrs()->Set( + attr_name, tensorflow::gtl::ArraySlice( + reinterpret_cast(values), num_values)); } void TFE_OpSetAttrTypeList(TFE_Op* op, const char* attr_name, const TF_DataType* values, int num_values) { - op->attrs.Set( + op->operation.MutableAttrs()->Set( attr_name, tensorflow::gtl::ArraySlice( reinterpret_cast(values), num_values)); @@ -378,8 +375,8 @@ void TFE_OpSetAttrBoolList(TFE_Op* op, const char* attr_name, for (int i = 0; i < num_values; ++i) { b[i] = values[i]; } - op->attrs.Set(attr_name, - tensorflow::gtl::ArraySlice(b.get(), num_values)); + op->operation.MutableAttrs()->Set( + attr_name, tensorflow::gtl::ArraySlice(b.get(), num_values)); } void TFE_OpSetAttrShapeList(TFE_Op* op, const char* attr_name, @@ -409,9 +406,9 @@ void TFE_OpSetAttrShapeList(TFE_Op* op, const char* attr_name, } } } - op->attrs.Set(attr_name, - tensorflow::gtl::ArraySlice( - proto.get(), num_values)); + op->operation.MutableAttrs()->Set( + attr_name, tensorflow::gtl::ArraySlice( + proto.get(), num_values)); } void TFE_OpSetAttrFunctionList(TFE_Op* op, const char* attr_name, @@ -419,12 +416,12 @@ void TFE_OpSetAttrFunctionList(TFE_Op* op, const char* attr_name, std::unique_ptr funcs( new tensorflow::NameAttrList[num_values]); for (int i = 0; i < num_values; i++) { - funcs[i].set_name(value[i]->name); - value[i]->attrs.FillAttrValueMap(funcs[i].mutable_attr()); + funcs[i].set_name(value[i]->operation.Name()); + value[i]->operation.Attrs().FillAttrValueMap(funcs[i].mutable_attr()); } - op->attrs.Set(attr_name, - tensorflow::gtl::ArraySlice( - funcs.get(), num_values)); + op->operation.MutableAttrs()->Set( + attr_name, tensorflow::gtl::ArraySlice( + funcs.get(), num_values)); } } // extern "C" @@ -460,18 +457,19 @@ int StepStatsDeviceIndex(tensorflow::StepStats* step_stats, } tensorflow::Status ValidateInputTypeAndPlacement( - tensorflow::EagerContext* ctx, tensorflow::Device* op_device, TFE_Op* op, - const tensorflow::OpKernel* kernel, tensorflow::RunMetadata* run_metadata) { + tensorflow::EagerContext* ctx, tensorflow::Device* op_device, + tensorflow::EagerOperation* op, const tensorflow::OpKernel* kernel, + tensorflow::RunMetadata* run_metadata) { tensorflow::Device* host_device = ctx->HostCPU(); const tensorflow::MemoryTypeVector& memtypes = kernel->input_memory_types(); - if (memtypes.size() != op->inputs.size()) { + if (memtypes.size() != op->Inputs().size()) { return tensorflow::errors::InvalidArgument( - "expected ", memtypes.size(), " inputs, got ", op->inputs.size()); + "expected ", memtypes.size(), " inputs, got ", op->Inputs().size()); } - for (int i = 0; i < op->inputs.size(); ++i) { + for (int i = 0; i < op->Inputs().size(); ++i) { const tensorflow::Device* expected_device = memtypes[i] == tensorflow::HOST_MEMORY ? host_device : op_device; - tensorflow::TensorHandle* handle = op->inputs[i]; + tensorflow::TensorHandle* handle = op->Inputs()[i]; tensorflow::Device* handle_device = nullptr; TF_RETURN_IF_ERROR(handle->Device(&handle_device)); const tensorflow::Device* actual_device = @@ -491,7 +489,7 @@ tensorflow::Status ValidateInputTypeAndPlacement( return tensorflow::errors::InvalidArgument( "Tensors on conflicting devices:" " cannot compute ", - op->name, " as input #", i, " was expected to be on ", + op->Name(), " as input #", i, " was expected to be on ", expected_device->name(), " but is actually on ", actual_device->name(), " (operation running on ", op_device->name(), ")", @@ -502,7 +500,7 @@ tensorflow::Status ValidateInputTypeAndPlacement( "between devices" " may slow down your model"); case tensorflow::DEVICE_PLACEMENT_WARN: - LOG(WARNING) << "before computing " << op->name << " input #" << i + LOG(WARNING) << "before computing " << op->Name() << " input #" << i << " was expected to be on " << expected_device->name() << " but is actually on " << actual_device->name() << " (operation running on " << op_device->name() @@ -534,16 +532,16 @@ tensorflow::Status ValidateInputTypeAndPlacement( if (copied_tensor != nullptr) copied_tensor->Unref(); return tensorflow::errors::Internal( "Failed copying input tensor from ", actual_device->name(), " to ", - expected_device->name(), " in order to run ", op->name, ": ", + expected_device->name(), " in order to run ", op->Name(), ": ", status.error_message()); } handle->Unref(); handle = copied_tensor; - op->inputs[i] = copied_tensor; + (*op->MutableInputs())[i] = copied_tensor; } if (handle->dtype != kernel->input_type(i)) { return tensorflow::errors::InvalidArgument( - "cannot compute ", op->name, " as input #", i, + "cannot compute ", op->Name(), " as input #", i, " was expected to be a ", tensorflow::DataTypeString(kernel->input_type(i)), " tensor but is a ", tensorflow::DataTypeString(handle->dtype), @@ -554,9 +552,10 @@ tensorflow::Status ValidateInputTypeAndPlacement( } tensorflow::Device* SelectDevice(const tensorflow::NodeDef& ndef, - TFE_Context* ctx, TF_Status* status) { + tensorflow::EagerContext* ctx, + TF_Status* status) { tensorflow::DeviceSet ds; - for (tensorflow::Device* d : *ctx->context.devices()) { + for (tensorflow::Device* d : *ctx->devices()) { ds.AddDevice(d); } tensorflow::DeviceTypeVector final_devices; @@ -570,7 +569,7 @@ tensorflow::Device* SelectDevice(const tensorflow::NodeDef& ndef, "Could not find valid device for node ", ndef.DebugString()); return nullptr; } - for (tensorflow::Device* d : *ctx->context.devices()) { + for (tensorflow::Device* d : *ctx->devices()) { if (d->device_type() == final_devices[0].type_string()) { return d; } @@ -599,15 +598,16 @@ const tensorflow::FunctionDef* OpToFunction( std::vector* arg_input_types, tensorflow::gtl::FlatMap* op_input_to_func_input, TF_Status* status) { - DCHECK(!op->is_function()); + DCHECK(!op->operation.is_function()); tensorflow::FunctionDef fdef; // Get the OpDef of the op we are trying to encapsulate. - TFE_Context* ctx = op->ctx; + TFE_Context* ctx = op->operation.ctx; const tensorflow::OpRegistrationData* op_data; { - status->status = ctx->context.FindFunctionOpData(op->name, &op_data); + status->status = + ctx->context.FindFunctionOpData(op->operation.Name(), &op_data); if (!status->status.ok()) { return nullptr; } @@ -618,7 +618,8 @@ const tensorflow::FunctionDef* OpToFunction( // Handle constant inputs. const std::unordered_set const_inputs( - *tensorflow::XlaOpRegistry::CompileTimeConstantInputs(op->name)); + *tensorflow::XlaOpRegistry::CompileTimeConstantInputs( + op->operation.Name())); // First add place holders for the input args, so that we can refer to them by // position in the next loop. Also tally up the resource inputs. @@ -644,7 +645,7 @@ const tensorflow::FunctionDef* OpToFunction( (*op_input_to_func_input)[i] = const_index; func_input_arg = signature->mutable_input_arg(const_index++); const_input_types->push_back( - static_cast(op->inputs[i]->dtype)); + static_cast(op->operation.Inputs()[i]->dtype)); } else if (op_input_arg.type() == tensorflow::DT_RESOURCE) { VLOG(1) << "For resource input, mapping op input " << i << " to func input " << resource_index; @@ -656,11 +657,11 @@ const tensorflow::FunctionDef* OpToFunction( (*op_input_to_func_input)[i] = arg_index; func_input_arg = signature->mutable_input_arg(arg_index++); arg_input_types->push_back( - static_cast(op->inputs[i]->dtype)); + static_cast(op->operation.Inputs()[i]->dtype)); } func_input_arg->set_name(op_input_arg.name()); - func_input_arg->set_type(op->inputs[i]->dtype); + func_input_arg->set_type(op->operation.Inputs()[i]->dtype); } VLOG(1) << "Added OpDef Inputs: " << fdef.DebugString(); @@ -673,7 +674,8 @@ const tensorflow::FunctionDef* OpToFunction( op_def.name(), func_id_generator.fetch_add(1))); // Add the node def and set its input names to match op_def's names. - const tensorflow::NodeDef& ndef = op->attrs.BuildNodeDef(); + const tensorflow::NodeDef& ndef = + op->operation.MutableAttrs()->BuildNodeDef(); DCHECK_EQ(signature->input_arg_size(), ndef.input_size()); *fdef.add_node_def() = ndef; for (int i = 0; i < op_def.input_arg_size(); ++i) { @@ -713,17 +715,18 @@ const tensorflow::FunctionDef* OpToFunction( // Builds an _XLALaunchOp as a wrapper over 'op', so that 'op' can be executed // via XLA. std::unique_ptr BuildXlaLaunch(TFE_Op* op, TF_Status* status) { - VLOG(1) << "Creating _XlaLaunchOp for TFE_Op " << op->name; - auto launch_op = - std::unique_ptr(TFE_NewOp(op->ctx, "_XlaLaunch", status)); + VLOG(1) << "Creating _XlaLaunchOp for TFE_Op " << op->operation.Name(); + auto launch_op = std::unique_ptr( + TFE_NewOp(op->operation.ctx, "_XlaLaunch", status)); if (TF_GetCode(status) != TF_OK) return nullptr; - if (op->device) { - TFE_OpSetDevice(launch_op.get(), op->device->name().c_str(), status); + if (op->operation.device) { + TFE_OpSetDevice(launch_op.get(), op->operation.device->name().c_str(), + status); if (TF_GetCode(status) != TF_OK) return nullptr; } const tensorflow::FunctionDef* fdef; - { fdef = op->ctx->context.FindFunctionDef(op->name); } + { fdef = op->operation.ctx->FindFunctionDef(op->operation.Name()); } std::vector const_input_types; std::vector arg_input_types; tensorflow::gtl::FlatMap op_input_to_func_input; @@ -748,20 +751,21 @@ std::unique_ptr BuildXlaLaunch(TFE_Op* op, TF_Status* status) { // Copy inputs and their devices. // Since input param reordering may have occurred between `op` and `launch_op` // via `op_input_to_func_input`, adjust the actual inputs accordingly. - launch_op->inputs = op->inputs; - for (tensorflow::TensorHandle* h : launch_op->inputs) { + *launch_op->operation.MutableInputs() = op->operation.Inputs(); + for (tensorflow::TensorHandle* h : launch_op->operation.Inputs()) { h->Ref(); } if (!op_input_to_func_input.empty()) { - DCHECK_EQ(op->inputs.size(), op_input_to_func_input.size()); + DCHECK_EQ(op->operation.Inputs().size(), op_input_to_func_input.size()); for (int i = 0; i < op_input_to_func_input.size(); ++i) { VLOG(1) << "mapping op input " << i << " to func input " << op_input_to_func_input[i]; - launch_op->inputs[op_input_to_func_input[i]] = op->inputs[i]; + (*launch_op->operation.MuableInputs())[op_input_to_func_input[i]] = + op->operation.Inputs()[i]; } } - launch_op->attrs.NumInputs(op->inputs.size()); + launch_op->operation.MutableAttrs()->NumInputs(op->operation.Inputs().size()); TFE_OpSetAttrTypeList(launch_op.get(), "Tconstants", const_input_types.data(), const_input_types.size()); @@ -796,16 +800,17 @@ std::unique_ptr BuildXlaLaunch(TFE_Op* op, TF_Status* status) { extern "C" { -void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals, +void TFE_Execute(TFE_Op* tfe_op, TFE_TensorHandle** retvals, int* num_retvals, TF_Status* status) { - TFE_Context* ctx = op->ctx; - status->status = ctx->context.GetStatus(); + tensorflow::EagerOperation* op = &tfe_op->operation; + tensorflow::EagerContext* ctx = op->EagerContext(); + status->status = ctx->GetStatus(); if (!status->status.ok()) { return; } #ifdef TENSORFLOW_EAGER_USE_XLA std::unique_ptr xla_launch_op; - if (op->use_xla && op->name != "_XlaLaunch") { + if (op->UseXla() && op->Name() != "_XlaLaunch") { xla_launch_op = BuildXlaLaunch(op, status); if (!status->status.ok()) { return; @@ -816,31 +821,31 @@ void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals, // Ensure all resource-touching ops run in the device the resource is, // regardless of anything else that has been specified. This is identical to // the graph mode behavior. - for (int i = 0; i < op->inputs.size(); ++i) { + for (int i = 0; i < op->Inputs().size(); ++i) { tensorflow::Device* input_op_device = nullptr; - status->status = op->inputs[i]->OpDevice(&input_op_device); + status->status = op->Inputs()[i]->OpDevice(&input_op_device); if (!status->status.ok()) return; - VLOG(2) << "for op " << op->name << " input " << i << " " - << tensorflow::DataTypeString(op->inputs[i]->dtype) << " " + VLOG(2) << "for op " << op->Name() << " input " << i << " " + << tensorflow::DataTypeString(op->Inputs()[i]->dtype) << " " << (input_op_device == nullptr ? "cpu" : input_op_device->name()) - << " " << (op->device == nullptr ? "cpu" : op->device->name()); - if (op->inputs[i]->dtype == tensorflow::DT_RESOURCE && - (input_op_device != op->device || input_op_device == nullptr)) { + << " " << (op->Device() == nullptr ? "cpu" : op->Device()->name()); + if (op->Inputs()[i]->dtype == tensorflow::DT_RESOURCE && + (input_op_device != op->Device() || input_op_device == nullptr)) { tensorflow::Device* d = - input_op_device == nullptr ? ctx->context.HostCPU() : input_op_device; - VLOG(1) << "Changing device of operation " << op->name << " to " + input_op_device == nullptr ? ctx->HostCPU() : input_op_device; + VLOG(1) << "Changing device of operation " << op->Name() << " to " << d->name() << " because input #" << i << " is a resource in this device."; - op->device = d; + op->SetDevice(d); } } - tensorflow::Device* device = op->device; + tensorflow::Device* device = op->Device(); - tensorflow::Fprint128 cache_key = - op->attrs.CacheKey(device == nullptr ? "unspecified" : device->name()); - tensorflow::KernelAndDevice* kernel = ctx->context.GetCachedKernel(cache_key); + tensorflow::Fprint128 cache_key = op->MutableAttrs()->CacheKey( + device == nullptr ? "unspecified" : device->name()); + tensorflow::KernelAndDevice* kernel = ctx->GetCachedKernel(cache_key); if (kernel == nullptr) { - const tensorflow::NodeDef& ndef = op->attrs.BuildNodeDef(); + const tensorflow::NodeDef& ndef = op->MutableAttrs()->BuildNodeDef(); if (device == nullptr) { device = SelectDevice(ndef, ctx, status); if (!status->status.ok()) { @@ -848,19 +853,19 @@ void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals, } } CHECK(device != nullptr); - if (ctx->context.LogDevicePlacement()) { + if (ctx->LogDevicePlacement()) { LOG(INFO) << "Executing op " << ndef.op() << " in device " << device->name(); } - kernel = new tensorflow::KernelAndDevice(ctx->context.GetRendezvous()); + kernel = new tensorflow::KernelAndDevice(ctx->GetRendezvous()); // Knowledge of the implementation of Init (and in-turn // FunctionLibraryRuntime::CreateKernel) tells us that ctx->func_lib_def // will be accessed, so grab on to the lock. // See WARNING comment in Execute (before kernel->Run) - would be nice to // rework to avoid this subtlety. - tensorflow::tf_shared_lock l(*ctx->context.FunctionsMu()); - status->status = tensorflow::KernelAndDevice::Init( - ndef, ctx->context.func_lib(device), kernel); + tensorflow::tf_shared_lock l(*ctx->FunctionsMu()); + status->status = + tensorflow::KernelAndDevice::Init(ndef, ctx->func_lib(device), kernel); if (!status->status.ok()) { delete kernel; return; @@ -868,7 +873,7 @@ void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals, // Update output_dtypes inside `kernel`. const tensorflow::OpDef* op_def = nullptr; const tensorflow::FunctionDef* function_def = - ctx->context.FuncLibDef()->Find(ndef.op()); + ctx->FuncLibDef()->Find(ndef.op()); if (function_def != nullptr) { op_def = &(function_def->signature()); } @@ -884,7 +889,7 @@ void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals, if (!status->status.ok()) { return; } - ctx->context.AddKernelToCache(cache_key, kernel); + ctx->AddKernelToCache(cache_key, kernel); } const tensorflow::DataTypeVector& output_dtypes = kernel->output_dtypes(); const int output_dtypes_size = output_dtypes.size(); @@ -903,43 +908,42 @@ void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals, device = kernel->device(); } status->status = ValidateInputTypeAndPlacement( - &ctx->context, device, op, kernel->kernel(), - ctx->context.ShouldStoreMetadata() ? ctx->context.RunMetadataProto() - : nullptr); + ctx, device, op, kernel->kernel(), + ctx->ShouldStoreMetadata() ? ctx->RunMetadataProto() : nullptr); if (!status->status.ok()) return; std::unique_ptr maybe_stats; - if (ctx->context.ShouldStoreMetadata()) { + if (ctx->ShouldStoreMetadata()) { maybe_stats.reset(new tensorflow::NodeExecStats); - maybe_stats->set_node_name(op->name); + maybe_stats->set_node_name(op->Name()); maybe_stats->set_all_start_micros(tensorflow::Env::Default()->NowMicros()); maybe_stats->set_op_start_rel_micros(0); maybe_stats->set_scheduled_micros(tensorflow::Env::Default()->NowMicros()); // TODO(apassos) track referenced tensors } - if (ctx->context.Async()) { + if (ctx->Async()) { // Note that for async mode, execution order will make sure that all // input handles are ready before executing them. // TODO(agarwal): Consider executing "cheap" kernels inline for performance. tensorflow::gtl::InlinedVector handle_retvals( *num_retvals); - tensorflow::uint64 id = op->ctx->context.NextId(); + tensorflow::uint64 id = ctx->NextId(); for (int i = 0; i < *num_retvals; ++i) { tensorflow::TensorHandle* h = - new tensorflow::TensorHandle(id, output_dtypes[i], &op->ctx->context); + new tensorflow::TensorHandle(id, output_dtypes[i], ctx); retvals[i] = new TFE_TensorHandle(h); handle_retvals[i] = h; } tensorflow::EagerNode* node = new tensorflow::ExecuteNode( - id, &op->ctx->context, op->device, op->inputs, kernel, - maybe_stats.release(), output_dtypes, handle_retvals); - ctx->context.ExecutorAdd(node); + id, ctx, op->Device(), op->Inputs(), kernel, maybe_stats.release(), + output_dtypes, handle_retvals); + ctx->ExecutorAdd(node); } else { // Execute checks if retvals[i] is nullptr or not to figure if it needs to // allocate it. tensorflow::gtl::InlinedVector handle_retvals( *num_retvals); status->status = tensorflow::EagerExecute( - &op->ctx->context, op->device, op->inputs, kernel, maybe_stats.get(), + ctx, op->Device(), op->Inputs(), kernel, maybe_stats.get(), handle_retvals.data(), *num_retvals); for (int i = 0; i < *num_retvals; ++i) { retvals[i] = new TFE_TensorHandle(handle_retvals[i]); @@ -1142,9 +1146,3 @@ void SetOpAttrValueScalar(TFE_Context* ctx, TFE_Op* op, } } } // namespace tensorflow - -TFE_Op::~TFE_Op() { - for (tensorflow::TensorHandle* h : inputs) { - h->Unref(); - } -} diff --git a/tensorflow/c/eager/c_api_internal.h b/tensorflow/c/eager/c_api_internal.h index 05dc64f521..49e1aab1ce 100644 --- a/tensorflow/c/eager/c_api_internal.h +++ b/tensorflow/c/eager/c_api_internal.h @@ -32,6 +32,7 @@ limitations under the License. #include "tensorflow/core/common_runtime/device_factory.h" #include "tensorflow/core/common_runtime/eager/context.h" #include "tensorflow/core/common_runtime/eager/eager_executor.h" +#include "tensorflow/core/common_runtime/eager/eager_operation.h" #include "tensorflow/core/common_runtime/eager/kernel_and_device.h" #include "tensorflow/core/common_runtime/eager/tensor_handle.h" #include "tensorflow/core/common_runtime/function.h" @@ -45,7 +46,6 @@ limitations under the License. #include "tensorflow/core/platform/thread_annotations.h" #include "tensorflow/core/public/version.h" - struct TFE_ContextOptions { TF_SessionOptions session_options; // true if async execution is enabled. @@ -85,19 +85,9 @@ struct TFE_Op { // t is NULL iff the TFE_Op corresponds to a TensorFlow function instead of a // primitive operation. TFE_Op(TFE_Context* ctx, const char* op, const tensorflow::AttrTypeMap* t) - : ctx(ctx), name(op), attrs(op), attr_types(t), device(nullptr) {} - - ~TFE_Op(); - - bool const is_function() const { return attr_types == nullptr; } + : operation(&ctx->context, op, t) {} - TFE_Context* ctx; // Must outlive the TFE_Op. - const tensorflow::string name; - tensorflow::AttrBuilder attrs; - const tensorflow::AttrTypeMap* attr_types; - tensorflow::gtl::InlinedVector inputs; - tensorflow::Device* device; - bool use_xla = false; + tensorflow::EagerOperation operation; }; namespace tensorflow { diff --git a/tensorflow/core/common_runtime/eager/BUILD b/tensorflow/core/common_runtime/eager/BUILD index 941a0e61c7..00ac4a4e47 100644 --- a/tensorflow/core/common_runtime/eager/BUILD +++ b/tensorflow/core/common_runtime/eager/BUILD @@ -54,6 +54,22 @@ tf_cuda_library( ], ) +tf_cuda_library( + name = "eager_operation", + srcs = [ + "eager_operation.cc", + ], + hdrs = [ + "eager_operation.h", + ], + visibility = ["//tensorflow:internal"], + deps = [ + ":context", + ":tensor_handle", + "//tensorflow/c/eager:runtime", + ], +) + tf_cuda_library( name = "tensor_handle", srcs = [ diff --git a/tensorflow/core/common_runtime/eager/eager_operation.cc b/tensorflow/core/common_runtime/eager/eager_operation.cc new file mode 100644 index 0000000000..381b05ada8 --- /dev/null +++ b/tensorflow/core/common_runtime/eager/eager_operation.cc @@ -0,0 +1,33 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/core/common_runtime/eager/eager_operation.h" + +namespace tensorflow { +tensorflow::Status EagerOperation::SetDevice(const char* device) { + auto status = Status::OK(); + tensorflow::Device* d = nullptr; + if (device != nullptr && strlen(device) > 0) { + status.Update(ctx_->FindDeviceByName(device, &d)); + } + device_ = d; + return status; +} + +void EagerOperation::AddInput(tensorflow::TensorHandle* h) { + h->Ref(); + inputs_.push_back(h); + attrs_.NumInputs(static_cast(inputs_.size())); +} +} // namespace tensorflow diff --git a/tensorflow/core/common_runtime/eager/eager_operation.h b/tensorflow/core/common_runtime/eager/eager_operation.h new file mode 100644 index 0000000000..6b6e53da87 --- /dev/null +++ b/tensorflow/core/common_runtime/eager/eager_operation.h @@ -0,0 +1,74 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CORE_COMMON_RUNTIME_EAGER_EAGER_OPERATION_H_ +#define TENSORFLOW_CORE_COMMON_RUNTIME_EAGER_EAGER_OPERATION_H_ + +#include "tensorflow/c/eager/runtime.h" +#include "tensorflow/core/common_runtime/eager/context.h" +#include "tensorflow/core/common_runtime/eager/tensor_handle.h" + +namespace tensorflow { +class EagerOperation { + public: + // t is NULL iff the EagerOperation corresponds to a TensorFlow function + // instead of a primitive operation. + EagerOperation(tensorflow::EagerContext* ctx, const char* op, + const tensorflow::AttrTypeMap* t) + : ctx_(ctx), name_(op), attrs_(op), attr_types_(t), device_(nullptr) {} + + ~EagerOperation() { + for (tensorflow::TensorHandle* h : inputs_) { + h->Unref(); + } + } + + bool is_function() const { return attr_types_ == nullptr; } + + tensorflow::EagerContext* EagerContext() { return ctx_; } + + tensorflow::AttrBuilder* MutableAttrs() { return &attrs_; } + const tensorflow::AttrBuilder& Attrs() const { return attrs_; } + + const tensorflow::gtl::InlinedVector& Inputs() + const { + return inputs_; + } + tensorflow::gtl::InlinedVector* + MutableInputs() { + return &inputs_; + } + void AddInput(tensorflow::TensorHandle* h); + + const tensorflow::string& Name() const { return name_; } + const tensorflow::AttrTypeMap* AttrTypes() const { return attr_types_; } + + tensorflow::Device* Device() const { return device_; } + tensorflow::Status SetDevice(const char* device); + void SetDevice(tensorflow::Device* device) { device_ = device; } + + void SetUseXla(bool use_xla) { use_xla_ = use_xla; } + + private: + tensorflow::EagerContext* ctx_; // Must outlive the EagerOperation. + const tensorflow::string name_; + tensorflow::AttrBuilder attrs_; + const tensorflow::AttrTypeMap* attr_types_; + tensorflow::gtl::InlinedVector inputs_; + tensorflow::Device* device_; + bool use_xla_ = false; +}; +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_COMMON_RUNTIME_EAGER_EAGER_OPERATION_H_ -- GitLab From 2b0b015ebb1c33a409836bd1c9c98124dfd841ec Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 20 Apr 2018 11:43:48 -0700 Subject: [PATCH 084/434] [XLA] Fix a bug in ToProto: don't add gather attributes twice. PiperOrigin-RevId: 193699745 --- tensorflow/compiler/xla/service/hlo_instruction.cc | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index a638d54d85..a714d0e114 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -2451,12 +2451,6 @@ HloInstructionProto HloInstruction::ToProto() const { proto.add_fft_length(fft_len); } - if (gather_dimension_numbers_ != nullptr) { - *proto.mutable_gather_dimension_numbers() = *gather_dimension_numbers_; - } - for (int64 bound : gather_window_bounds_) { - proto.add_gather_window_bounds(bound); - } proto.set_channel_name(channel_name_); proto.set_cost_estimate_ns(cost_estimate_ns_); -- GitLab From 0074dffd076e0faf4da5913aebfa594ef925d6c7 Mon Sep 17 00:00:00 2001 From: Anna R Date: Fri, 20 Apr 2018 12:01:21 -0700 Subject: [PATCH 085/434] Prefix compat import with underscore in meta_graph_transform.py so that it doesn't get exported as part of API: https://www.tensorflow.org/versions/r1.8/api_docs/python/tf/contrib/meta_graph_transform/meta_graph_transform PiperOrigin-RevId: 193702570 --- .../meta_graph_transform/meta_graph_transform.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tensorflow/contrib/meta_graph_transform/meta_graph_transform.py b/tensorflow/contrib/meta_graph_transform/meta_graph_transform.py index ff88b4fa84..4090c1ff3e 100644 --- a/tensorflow/contrib/meta_graph_transform/meta_graph_transform.py +++ b/tensorflow/contrib/meta_graph_transform/meta_graph_transform.py @@ -30,7 +30,7 @@ from tensorflow.python.framework import importer as _importer from tensorflow.python.framework import ops as _ops from tensorflow.python.saved_model import constants as _saved_model_constants from tensorflow.python.training import saver as _saver_lib -from tensorflow.python.util import compat +from tensorflow.python.util import compat as _compat from tensorflow.tools import graph_transforms as _graph_transforms @@ -161,7 +161,7 @@ def _clean_save_and_restore(graph_def, op, removed_op_names): shapes = [] dtypes = [] for index, value in enumerate(name_op_value_tensor.string_val): - if not _is_removed(compat.as_str(value), removed_op_names): + if not _is_removed(_compat.as_str(value), removed_op_names): names.append(value) shapes.append(shape_op_value_tensor.string_val[index]) dtypes.append(op.attr['dtypes'].list.type[index]) @@ -651,7 +651,7 @@ def _is_removed_mentioned(s, removed_op_names): # /foo/bar. This regex ensures that we handle these two nodes # as separate entities. It matches on nodes having names in the form of # '/foo/bar_x' as well as nodes having names in the form of 'foo.' - s_names = _re.findall(r'((?:[\/]?[a-zA-Z0-9\_]*)*)', compat.as_str_any(s)) + s_names = _re.findall(r'((?:[\/]?[a-zA-Z0-9\_]*)*)', _compat.as_str_any(s)) for removed_op_name in removed_op_names: for s_name in s_names: if s_name.endswith(removed_op_name): @@ -737,9 +737,9 @@ def meta_graph_transform( for tag in tags: meta_graph_def.meta_info_def.tags.append(tag) - base_op_names = [compat.as_str(node.name) + base_op_names = [_compat.as_str(node.name) for node in base_meta_graph_def.graph_def.node] - retained_op_names = [compat.as_str(node.name) + retained_op_names = [_compat.as_str(node.name) for node in meta_graph_def.graph_def.node] removed_op_names = set(base_op_names) - set(retained_op_names) -- GitLab From 1b5839e6acad5d360ea9e5b94226b30047924cb9 Mon Sep 17 00:00:00 2001 From: Dimitris Vardoulakis Date: Fri, 20 Apr 2018 12:02:56 -0700 Subject: [PATCH 086/434] [TF:XLA] Now that the compiler no longer introduces implicit broadcasts, forbid them in the HLO verifier. PiperOrigin-RevId: 193702874 --- tensorflow/compiler/xla/service/BUILD | 1 + .../compiler/xla/service/hlo_verifier.cc | 21 ++++++++ .../compiler/xla/service/hlo_verifier.h | 4 ++ .../xla/service/reshape_mover_test.cc | 51 ------------------- 4 files changed, 26 insertions(+), 51 deletions(-) diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index 9009cbf845..9555d91817 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -2032,6 +2032,7 @@ cc_library( srcs = ["hlo_verifier.cc"], hdrs = ["hlo_verifier.h"], deps = [ + ":hlo", ":hlo_pass", ":shape_inference", "//tensorflow/compiler/xla:status_macros", diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc index 80ed6d6832..8a30cbf9cd 100644 --- a/tensorflow/compiler/xla/service/hlo_verifier.cc +++ b/tensorflow/compiler/xla/service/hlo_verifier.cc @@ -15,6 +15,7 @@ limitations under the License. #include +#include "tensorflow/compiler/xla/service/hlo_opcode.h" #include "tensorflow/compiler/xla/service/hlo_verifier.h" #include "tensorflow/compiler/xla/status_macros.h" #include "tensorflow/core/lib/core/errors.h" @@ -780,6 +781,24 @@ Status HloVerifier::CheckWhileInstruction(HloInstruction* instruction) { return tensorflow::Status::OK(); } +Status HloVerifier::CheckElementwiseInstruction(HloInstruction* instruction) { + const Shape& out_shape = instruction->shape(); + for (HloInstruction* operand : instruction->operands()) { + const Shape& operand_shape = operand->shape(); + if (!ShapeUtil::IsScalar(operand_shape) && + !ShapeUtil::CompatibleIgnoringElementType(operand_shape, out_shape)) { + return FailedPrecondition( + "Implicit broadcast is not allowed in HLO." + "Found non-compatible shapes for instruction %s.\n" + "output: %s\noperand: %s\n", + HloOpcodeString(instruction->opcode()).c_str(), + ShapeUtil::HumanString(out_shape).c_str(), + ShapeUtil::HumanString(operand_shape).c_str()); + } + } + return tensorflow::Status::OK(); +} + StatusOr HloVerifier::Run(HloModule* module) { TF_RETURN_IF_ERROR(VerifyHloStructure(module)); @@ -821,6 +840,8 @@ StatusOr HloVerifier::Run(HloModule* module) { << " != " << ShapeUtil::Rank(instruction->operand(0)->shape()); } else if (instruction->opcode() == HloOpcode::kWhile) { TF_RETURN_IF_ERROR(CheckWhileInstruction(instruction)); + } else if (instruction->IsElementwise()) { + TF_RETURN_IF_ERROR(CheckElementwiseInstruction(instruction)); } auto previous = instructions.find(instruction->name()); diff --git a/tensorflow/compiler/xla/service/hlo_verifier.h b/tensorflow/compiler/xla/service/hlo_verifier.h index 1ec55a9bdc..6208887547 100644 --- a/tensorflow/compiler/xla/service/hlo_verifier.h +++ b/tensorflow/compiler/xla/service/hlo_verifier.h @@ -146,6 +146,10 @@ class HloVerifier : public HloPassInterface { Status CheckWhileInstruction(HloInstruction* instruction); + // Checks that the non-scalar operand shapes are compatible to the output + // shape, i.e., that there are no implicit broadcasts of size-one dimensions. + Status CheckElementwiseInstruction(HloInstruction* instruction); + // Creates a ShapeVerifier that checks that shapes match inferred // expectations. This is a factory function because ShapeVerifier, // being a DfsHloVisitor, is stateful. We want a clean object diff --git a/tensorflow/compiler/xla/service/reshape_mover_test.cc b/tensorflow/compiler/xla/service/reshape_mover_test.cc index 094f7319f4..13e2d3258e 100644 --- a/tensorflow/compiler/xla/service/reshape_mover_test.cc +++ b/tensorflow/compiler/xla/service/reshape_mover_test.cc @@ -458,57 +458,6 @@ TEST_F(ReshapeMoverTest, ScalarReshapeNotMovedAcrossSelect) { EXPECT_EQ(select, computation->root_instruction()); } -// Tree looks like: -// -// param0 [1,128,1] -// | -// reshape [128,1] constant [128,1024] -// \ / -// multiply w/implicit broadcast [128,1024] -// -// The reshape mover would like to sink the reshape below the multiply. -// -// Previously we would attempt to insert a reshape of the constant to [1,128,1] -// (which is unsound, because it has a different number of elements) as -// preparation for sinking the reshape. -// -// To eliminate the unsoundness, we outlaw reshape sinking when one of the -// operands is implicitly broadcast in the elementwise consumer. -// -// TODO(b/37799338) However, it would be possible in this case to do a more -// in-depth analysis to get reshape movement to occur: -// -// 1. Note that the broadcast dimension (logical dimension 1) in the operands -// would map back to logical dimension 2 in the param0 node. -// 2. Match rank of the constant to the param0 node (by prepending a trivial 1 -// dimension). -// 3. Reshape to [128,1024] at the root. -// -// But this is not currently done. -TEST_F(ReshapeMoverTest, ImplicitlyBroadcastReshapeIsNotMovedBug37787999) { - HloComputation::Builder builder(TestName()); - auto param0 = builder.AddInstruction(HloInstruction::CreateParameter( - 0, ShapeUtil::MakeShape(F32, {1, 128, 1}), "param0")); - auto reshape = builder.AddInstruction(HloInstruction::CreateReshape( - ShapeUtil::MakeShape(F32, {128, 1}), param0)); - Array2D a(128, 1024); - auto literal = Literal::CreateR2FromArray2D(a); - auto constant = builder.AddInstruction( - HloInstruction::CreateConstant(std::move(literal))); - auto multiply = builder.AddInstruction(HloInstruction::CreateBinary( - constant->shape(), HloOpcode::kMultiply, constant, reshape)); - - auto computation = module().AddEntryComputation(builder.Build()); - EXPECT_THAT(computation->root_instruction(), - op::Multiply(op::Constant(), op::Reshape(param0))); - - EXPECT_FALSE(ReshapeMover().Run(&module()).ValueOrDie()); - - EXPECT_THAT(computation->root_instruction(), - op::Multiply(op::Constant(), op::Reshape(param0))); - EXPECT_EQ(multiply, computation->root_instruction()); -} - // Tree looks like this: // // add1 -- GitLab From ceed923d600584ade8d159271422b4a08f728cbb Mon Sep 17 00:00:00 2001 From: Yangzihao Wang Date: Fri, 20 Apr 2018 12:05:11 -0700 Subject: [PATCH 087/434] Add native dilated support for conv3d and its gradients in cudnn v>=6. PiperOrigin-RevId: 193703316 --- tensorflow/core/framework/common_shape_fns.cc | 32 ++- .../core/framework/common_shape_fns_test.cc | 55 ++++- tensorflow/core/kernels/conv_grad_ops_3d.cc | 115 +++++++++- tensorflow/core/kernels/conv_ops_3d.cc | 52 ++++- tensorflow/core/ops/nn_ops.cc | 2 + .../python/kernel_tests/conv_ops_3d_test.py | 196 +++++++++++++++++- tensorflow/python/ops/nn_grad.py | 6 + 7 files changed, 426 insertions(+), 32 deletions(-) diff --git a/tensorflow/core/framework/common_shape_fns.cc b/tensorflow/core/framework/common_shape_fns.cc index 72eeda7a43..0916c9b7a8 100644 --- a/tensorflow/core/framework/common_shape_fns.cc +++ b/tensorflow/core/framework/common_shape_fns.cc @@ -487,6 +487,15 @@ Status Conv3DShape(shape_inference::InferenceContext* c) { string data_format; Status s = c->GetAttr("data_format", &data_format); + std::vector dilations; + TF_RETURN_IF_ERROR(c->GetAttr("dilations", &dilations)); + + if (dilations.size() != 5) { + return errors::InvalidArgument( + "Conv3D requires the dilation attribute to contain 5 values, but got: ", + dilations.size()); + } + std::vector strides; TF_RETURN_IF_ERROR(c->GetAttr("strides", &strides)); if (strides.size() != 5) { @@ -496,6 +505,7 @@ Status Conv3DShape(shape_inference::InferenceContext* c) { } int32 stride_planes, stride_rows, stride_cols; + int32 dilation_planes, dilation_rows, dilation_cols; if (s.ok() && data_format == "NCDHW") { // Convert input_shape to NDHWC. auto dim = [&](char dimension) { @@ -506,10 +516,16 @@ Status Conv3DShape(shape_inference::InferenceContext* c) { stride_planes = strides[2]; stride_rows = strides[3]; stride_cols = strides[4]; + dilation_planes = dilations[2]; + dilation_cols = dilations[3]; + dilation_rows = dilations[4]; } else { stride_planes = strides[1]; stride_rows = strides[2]; stride_cols = strides[3]; + dilation_planes = dilations[1]; + dilation_cols = dilations[2]; + dilation_rows = dilations[3]; } DimensionHandle batch_size_dim = c->Dim(input_shape, 0); @@ -530,13 +546,15 @@ Status Conv3DShape(shape_inference::InferenceContext* c) { TF_RETURN_IF_ERROR(c->GetAttr("padding", &padding)); DimensionHandle output_planes, output_rows, output_cols; - TF_RETURN_IF_ERROR( - GetWindowedOutputSizeFromDims(c, in_planes_dim, filter_planes_dim, - stride_planes, padding, &output_planes)); - TF_RETURN_IF_ERROR(GetWindowedOutputSizeFromDims( - c, in_rows_dim, filter_rows_dim, stride_rows, padding, &output_rows)); - TF_RETURN_IF_ERROR(GetWindowedOutputSizeFromDims( - c, in_cols_dim, filter_cols_dim, stride_cols, padding, &output_cols)); + TF_RETURN_IF_ERROR(GetWindowedOutputSizeFromDimsV2( + c, in_planes_dim, filter_planes_dim, dilation_planes, stride_planes, + padding, &output_planes)); + TF_RETURN_IF_ERROR(GetWindowedOutputSizeFromDimsV2( + c, in_rows_dim, filter_rows_dim, dilation_rows, stride_rows, padding, + &output_rows)); + TF_RETURN_IF_ERROR(GetWindowedOutputSizeFromDimsV2( + c, in_cols_dim, filter_cols_dim, dilation_cols, stride_cols, padding, + &output_cols)); ShapeHandle output_shape; if (data_format == "NCDHW") { diff --git a/tensorflow/core/framework/common_shape_fns_test.cc b/tensorflow/core/framework/common_shape_fns_test.cc index 13d429b895..919e0967c0 100644 --- a/tensorflow/core/framework/common_shape_fns_test.cc +++ b/tensorflow/core/framework/common_shape_fns_test.cc @@ -644,15 +644,19 @@ TEST(CommonShapeFnsTest, Conv3DShapeTest) { .Finalize(&op.node_def)); }; - // 1x1x1 filter - set_op({{1, 1, 1, 1, 1}}, "VALID"); - INFER_OK(op, "[1,2,2,2,1];[1,1,1,1,1]", "[d0_0,2,2,2,d1_4]"); - // Invalid rank for input INFER_ERROR("must be rank 5", op, "[4,4];[2,1,1,1]"); // Invalid rank for filter INFER_ERROR("must be rank 5", op, "[1,4,4,1];[2,1,1]"); + // Invalid value for strides + set_op({{1, 1, 1, 0, 1}}, "VALID"); + INFER_ERROR("must be > 0", op, "[1,2,2,2,1];[1,1,1,1,1]"); + + // 1x1x1 filter + set_op({{1, 1, 1, 1, 1}}, "VALID"); + INFER_OK(op, "[1,2,2,2,1];[1,1,1,1,1]", "[d0_0,2,2,2,d1_4]"); + // unknown dims in the critical fields give partial inference. INFER_OK(op, "[1,2,2,2,1];[1,1,1,1,1]", "[d0_0,2,2,2,d1_4]"); INFER_OK(op, "[1,?,2,2,1];[1,1,1,1,1]", "[d0_0,?,2,2,d1_4]"); @@ -712,6 +716,49 @@ TEST(CommonShapeFnsTest, Conv3DShapeTest) { INFER_OK(op, "[1,4,9,4,1];[2,2,2,1,?]", "[d0_0,2,3,1,d1_4]"); } +TEST(CommonShapeFnsTest, Conv3DDilatedShapeTest) { + ShapeInferenceTestOp op("Conv3D"); + auto set_op = [&op](const std::vector& dilations, + const std::vector& strides, + const string& padding) { + TF_CHECK_OK(NodeDefBuilder("test", "Conv3D") + .Input("input", 0, DT_FLOAT) + .Input("filter", 0, DT_FLOAT) + .Attr("dilations", dilations) + .Attr("strides", strides) + .Attr("padding", padding) + .Finalize(&op.node_def)); + }; + + // Invalid rank for dilation + set_op({{1, 2, 1, 1}}, {{1, 1, 1, 1, 1}}, "VALID"); + INFER_ERROR("contain 5 values", op, "[1,2,2,2,1];[1,1,1,1,1]"); + + // Invalid value for dilation + set_op({{1, 2, 0, 1, 1}}, {{1, 1, 1, 1, 1}}, "VALID"); + INFER_ERROR("must be >= 1", op, "[1,2,2,2,1];[1,1,1,1,1]"); + + // 2x1x1 dilation 1x1x1 filter + set_op({{1, 2, 1, 1, 1}}, {{1, 1, 1, 1, 1}}, "VALID"); + INFER_OK(op, "[1,2,2,2,1];[1,1,1,1,1]", "[d0_0,2,2,2,d1_4]"); + + // 2x1x1 dilation 2x2x2 filter + set_op({{1, 2, 1, 1, 1}}, {{1, 1, 1, 1, 1}}, "VALID"); + INFER_OK(op, "[1,3,2,2,1];[2,2,2,1,1]", "[d0_0,1,1,1,d1_4]"); + + // 2x1x1 dilation 3x3x3 input, 1x1x1 filter, 2x2x2 stride + set_op({{1, 2, 1, 1, 1}}, {{1, 2, 2, 2, 1}}, "VALID"); + INFER_OK(op, "[1,3,3,3,1];[1,1,1,1,1]", "[d0_0,2,2,2,d1_4]"); + + // 2x1x1 dilation 3x3x3 input, 1x1x1 filter, 2x1x1 stride + set_op({{1, 2, 1, 1, 1}}, {{1, 2, 1, 1, 1}}, "VALID"); + INFER_OK(op, "[1,3,3,3,1];[1,1,1,1,1]", "[d0_0,2,3,3,d1_4]"); + + // 2x1x1 dilation 4x4x4 input, 2x2x2 filter, 1x1x1 stride + set_op({{1, 2, 1, 1, 1}}, {{1, 1, 1, 1, 1}}, "SAME"); + INFER_OK(op, "[1,4,4,4,1];[2,2,2,1,1]", "[d0_0,d0_1,d0_2,d0_3,d1_4]"); +} + TEST(CommonShapeFnsTest, DepthwiseConv2DShapeTest) { ShapeInferenceTestOp op("DepthwiseConv2dNative"); std::vector strides = {{1, 1, 1, 1}}; diff --git a/tensorflow/core/kernels/conv_grad_ops_3d.cc b/tensorflow/core/kernels/conv_grad_ops_3d.cc index 1234997bc5..092e859a5b 100644 --- a/tensorflow/core/kernels/conv_grad_ops_3d.cc +++ b/tensorflow/core/kernels/conv_grad_ops_3d.cc @@ -79,13 +79,18 @@ typedef Eigen::GpuDevice GPUDevice; context, out_depth == GetTensorDim(out_backprop, data_format_, 'C'), \ errors::InvalidArgument( \ label, ": filter and out_backprop must have the same out_depth")); \ + const std::array dilations = { \ + {GetTensorDim(dilation_, data_format_, '0'), \ + GetTensorDim(dilation_, data_format_, '1'), \ + GetTensorDim(dilation_, data_format_, '2')}}; \ const std::array strides = { \ {GetTensorDim(stride_, data_format_, '0'), \ GetTensorDim(stride_, data_format_, '1'), \ GetTensorDim(stride_, data_format_, '2')}}; \ std::array out, padding; \ - OP_REQUIRES_OK(context, Get3dOutputSize(input_size, filter_size, strides, \ - padding_, &out, &padding)); \ + OP_REQUIRES_OK( \ + context, Get3dOutputSizeV2(input_size, filter_size, dilations, strides, \ + padding_, &out, &padding)); \ OP_REQUIRES(context, output_planes == out[0], \ errors::InvalidArgument( \ label, \ @@ -151,6 +156,26 @@ class Conv3DBackpropInputOp : public OpKernel { "Conv3DBackpropInputOpV2 only supports NDHWC on the CPU.")); } + OP_REQUIRES_OK(context, context->GetAttr("dilations", &dilation_)); + OP_REQUIRES(context, dilation_.size() == 5, + errors::InvalidArgument("Dilation rates field must " + "specify 5 dimensions")); + OP_REQUIRES(context, + (GetTensorDim(dilation_, data_format_, 'C') == 1 && + GetTensorDim(dilation_, data_format_, 'N') == 1), + errors::InvalidArgument( + "Current implementation does not yet support " + "dilation rates in the batch and depth dimensions.")); + + // TODO(yangzihao): Add CPU version of dilated conv 3D. + OP_REQUIRES(context, + (GetTensorDim(dilation_, data_format_, '0') == 1 && + GetTensorDim(dilation_, data_format_, '1') == 1 && + GetTensorDim(dilation_, data_format_, '2') == 1), + errors::InvalidArgument( + "Current CPU implementation does not yet support " + "dilation rates larger than 1.")); + OP_REQUIRES_OK(context, context->GetAttr("strides", &stride_)); OP_REQUIRES(context, stride_.size() == 5, errors::InvalidArgument("Sliding window strides field must " @@ -223,6 +248,7 @@ class Conv3DBackpropInputOp : public OpKernel { } private: + std::vector dilation_; std::vector stride_; Padding padding_; TensorFormat data_format_; @@ -261,6 +287,26 @@ class Conv3DBackpropFilterOp : public OpKernel { "Conv3DBackpropFilterOpV2 only supports NDHWC on the CPU.")); } + OP_REQUIRES_OK(context, context->GetAttr("dilations", &dilation_)); + OP_REQUIRES(context, dilation_.size() == 5, + errors::InvalidArgument("Dilation rates field must " + "specify 5 dimensions")); + OP_REQUIRES(context, + (GetTensorDim(dilation_, data_format_, 'C') == 1 && + GetTensorDim(dilation_, data_format_, 'N') == 1), + errors::InvalidArgument( + "Current implementation does not yet support " + "dilation rates in the batch and depth dimensions.")); + + // TODO(yangzihao): Add CPU version of dilated conv 3D. + OP_REQUIRES(context, + (GetTensorDim(dilation_, data_format_, '0') == 1 && + GetTensorDim(dilation_, data_format_, '1') == 1 && + GetTensorDim(dilation_, data_format_, '2') == 1), + errors::InvalidArgument( + "Current CPU implementation does not yet support " + "dilation rates larger than 1.")); + OP_REQUIRES_OK(context, context->GetAttr("strides", &stride_)); OP_REQUIRES(context, stride_.size() == 5, errors::InvalidArgument("Sliding window strides field must " @@ -370,6 +416,7 @@ class Conv3DBackpropFilterOp : public OpKernel { } private: + std::vector dilation_; std::vector stride_; Padding padding_; TensorFormat data_format_; @@ -438,6 +485,22 @@ class Conv3DBackpropInputOp : public OpKernel { OP_REQUIRES(context, FormatFromString(data_format, &data_format_), errors::InvalidArgument("Invalid data format")); } + OP_REQUIRES_OK(context, context->GetAttr("dilations", &dilation_)); + OP_REQUIRES(context, dilation_.size() == 5, + errors::InvalidArgument("Dilation rates field must " + "specify 5 dimensions")); + OP_REQUIRES(context, + (GetTensorDim(dilation_, data_format_, 'C') == 1 && + GetTensorDim(dilation_, data_format_, 'N') == 1), + errors::InvalidArgument( + "Current implementation does not yet support " + "dilation rates in the batch and depth dimensions.")); + OP_REQUIRES( + context, + (GetTensorDim(dilation_, data_format_, '0') > 0 && + GetTensorDim(dilation_, data_format_, '1') > 0 && + GetTensorDim(dilation_, data_format_, '2') > 0), + errors::InvalidArgument("Dilated rates should be larger than 0.")); OP_REQUIRES_OK(context, context->GetAttr("strides", &stride_)); OP_REQUIRES(context, stride_.size() == 5, errors::InvalidArgument("Sliding window strides field must " @@ -448,6 +511,12 @@ class Conv3DBackpropInputOp : public OpKernel { GetTensorDim(stride_, data_format_, 'N') == 1), errors::InvalidArgument("Current implementation does not yet support " "strides in the batch and depth dimensions.")); + OP_REQUIRES( + context, + (GetTensorDim(stride_, data_format_, '0') > 0 && + GetTensorDim(stride_, data_format_, '1') > 0 && + GetTensorDim(stride_, data_format_, '2') > 0), + errors::InvalidArgument("Spatial strides should be larger than 0.")); OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_)); cudnn_use_autotune_ = CudnnUseAutotune(); } @@ -471,6 +540,7 @@ class Conv3DBackpropInputOp : public OpKernel { OP_REQUIRES(context, stream, errors::Internal("No GPU stream available.")); if (filter_size[0] == 1 && filter_size[1] == 1 && filter_size[2] == 1 && + dilation_[0] == 1 && dilation_[1] == 1 && dilation_[2] == 1 && stride_[0] == 1 && stride_[1] == 1 && stride_[2] == 1 && data_format_ == FORMAT_NHWC) { const uint64 m = batch * input_size[0] * input_size[1] * input_size[2]; @@ -580,7 +650,10 @@ class Conv3DBackpropInputOp : public OpKernel { .set_input_feature_map_count(in_depth) .set_output_feature_map_count(out_depth); perftools::gputools::dnn::ConvolutionDescriptor conv_desc(3); - conv_desc.set_filter_stride(DimIndex::X, strides[2]) + conv_desc.set_dilation_rate(DimIndex::X, dilations[2]) + .set_dilation_rate(DimIndex::Y, dilations[1]) + .set_dilation_rate(DimIndex::Z, dilations[0]) + .set_filter_stride(DimIndex::X, strides[2]) .set_filter_stride(DimIndex::Y, strides[1]) .set_filter_stride(DimIndex::Z, strides[0]) .set_zero_padding(DimIndex::X, padding_cols / 2) @@ -645,9 +718,7 @@ class Conv3DBackpropInputOp : public OpKernel { {{input_size[0], input_size[1], input_size[2]}}, out_depth, {{filter_size[0], filter_size[1], filter_size[2]}}, - // TODO(yangzihao): Send in arbitrary dilation rates after the dilated - // conv is supported. - /*dilation=*/{{1, 1, 1}}, + {{dilations[0], dilations[1], dilations[2]}}, {{strides[0], strides[1], strides[2]}}, {{padding_planes, padding_rows, padding_cols}}, dtype, @@ -755,6 +826,7 @@ class Conv3DBackpropInputOp : public OpKernel { } private: + std::vector dilation_; std::vector stride_; Padding padding_; TensorFormat data_format_; @@ -784,6 +856,22 @@ class Conv3DBackpropFilterOp : public OpKernel { OP_REQUIRES(context, FormatFromString(data_format, &data_format_), errors::InvalidArgument("Invalid data format")); } + OP_REQUIRES_OK(context, context->GetAttr("dilations", &dilation_)); + OP_REQUIRES(context, dilation_.size() == 5, + errors::InvalidArgument("Dilation rates field must " + "specify 5 dimensions")); + OP_REQUIRES(context, + (GetTensorDim(dilation_, data_format_, 'C') == 1 && + GetTensorDim(dilation_, data_format_, 'N') == 1), + errors::InvalidArgument( + "Current implementation does not yet support " + "dilation rates in the batch and depth dimensions.")); + OP_REQUIRES( + context, + (GetTensorDim(dilation_, data_format_, '0') > 0 && + GetTensorDim(dilation_, data_format_, '1') > 0 && + GetTensorDim(dilation_, data_format_, '2') > 0), + errors::InvalidArgument("Dilated rates should be larger than 0.")); OP_REQUIRES_OK(context, context->GetAttr("strides", &stride_)); OP_REQUIRES(context, stride_.size() == 5, errors::InvalidArgument("Sliding window strides field must " @@ -794,6 +882,12 @@ class Conv3DBackpropFilterOp : public OpKernel { GetTensorDim(stride_, data_format_, 'N') == 1), errors::InvalidArgument("Current implementation does not yet support " "strides in the batch and depth dimensions.")); + OP_REQUIRES( + context, + (GetTensorDim(stride_, data_format_, '0') > 0 && + GetTensorDim(stride_, data_format_, '1') > 0 && + GetTensorDim(stride_, data_format_, '2') > 0), + errors::InvalidArgument("Spatial strides should be larger than 0.")); OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_)); cudnn_use_autotune_ = CudnnUseAutotune(); } @@ -820,6 +914,7 @@ class Conv3DBackpropFilterOp : public OpKernel { OP_REQUIRES(context, stream, errors::Internal("No GPU stream available.")); if (filter_size[1] == 1 && filter_size[2] == 1 && filter_size[0] == 1 && + dilations[2] == 1 && dilations[1] == 1 && dilations[0] == 1 && strides[2] == 1 && strides[1] == 1 && strides[0] == 1 && data_format_ == FORMAT_NHWC) { const uint64 m = in_depth; @@ -943,7 +1038,10 @@ class Conv3DBackpropFilterOp : public OpKernel { .set_input_feature_map_count(in_depth) .set_output_feature_map_count(out_depth); perftools::gputools::dnn::ConvolutionDescriptor conv_desc(3); - conv_desc.set_filter_stride(DimIndex::X, strides[2]) + conv_desc.set_dilation_rate(DimIndex::X, dilations[2]) + .set_dilation_rate(DimIndex::Y, dilations[1]) + .set_dilation_rate(DimIndex::Z, dilations[0]) + .set_filter_stride(DimIndex::X, strides[2]) .set_filter_stride(DimIndex::Y, strides[1]) .set_filter_stride(DimIndex::Z, strides[0]) .set_zero_padding(DimIndex::X, padding_cols / 2) @@ -1016,7 +1114,7 @@ class Conv3DBackpropFilterOp : public OpKernel { {{input_size[0], input_size[1], input_size[2]}}, out_depth, {{filter_size[0], filter_size[1], filter_size[2]}}, - {{1, 1, 1}}, + {{dilations[0], dilations[1], dilations[2]}}, {{strides[0], strides[1], strides[2]}}, {{padding_planes, padding_rows, padding_cols}}, dtype, @@ -1102,6 +1200,7 @@ class Conv3DBackpropFilterOp : public OpKernel { } private: + std::vector dilation_; std::vector stride_; Padding padding_; TensorFormat data_format_; diff --git a/tensorflow/core/kernels/conv_ops_3d.cc b/tensorflow/core/kernels/conv_ops_3d.cc index 0b7c1524e6..48dd3c9eb0 100644 --- a/tensorflow/core/kernels/conv_ops_3d.cc +++ b/tensorflow/core/kernels/conv_ops_3d.cc @@ -49,12 +49,18 @@ template struct LaunchConvOp { static void launch(OpKernelContext* context, bool cudnn_use_autotune, const Tensor& input, const Tensor& filter, + const std::array& dilations, const std::array& strides, const Padding padding, TensorFormat data_format, Tensor* output) { OP_REQUIRES(context, data_format == FORMAT_NHWC, errors::InvalidArgument("CPU implementation of Conv3D " "currently only supports the NHWC " "tensor format.")); + OP_REQUIRES(context, + dilations[0] == 1 && dilations[1] == 1 && dilations[2] == 1, + errors::InvalidArgument("CPU implementation of Conv3D " + "currently only supports dilated rates " + "of 1.")); functor::CuboidConvolution()( context->eigen_device(), output->tensor(), input.tensor(), filter.tensor(), strides[2], strides[1], @@ -80,6 +86,28 @@ class Conv3DOp : public BinaryOp { GetTensorDim(stride_, data_format_, 'C') == 1), errors::InvalidArgument("Current implementation does not yet support " "strides in the batch and depth dimensions.")); + OP_REQUIRES( + context, + (GetTensorDim(stride_, data_format_, '0') > 0 && + GetTensorDim(stride_, data_format_, '1') > 0 && + GetTensorDim(stride_, data_format_, '2') > 0), + errors::InvalidArgument("Spatial strides should be larger than 0.")); + OP_REQUIRES_OK(context, context->GetAttr("dilations", &dilation_)); + OP_REQUIRES(context, dilation_.size() == 5, + errors::InvalidArgument("Dilation rates field must " + "specify 5 dimensions")); + OP_REQUIRES(context, + (GetTensorDim(dilation_, data_format_, 'N') == 1 && + GetTensorDim(dilation_, data_format_, 'C') == 1), + errors::InvalidArgument( + "Current implementation does not yet support " + "dilation rates in the batch and depth dimensions.")); + OP_REQUIRES( + context, + (GetTensorDim(dilation_, data_format_, '0') > 0 && + GetTensorDim(dilation_, data_format_, '1') > 0 && + GetTensorDim(dilation_, data_format_, '2') > 0), + errors::InvalidArgument("Dilated rates should be larger than 0.")); OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_)); cudnn_use_autotune_ = CudnnUseAutotune(); } @@ -115,13 +143,18 @@ class Conv3DOp : public BinaryOp { GetTensorDim(input, data_format_, '2')}}; std::array filter_size = { {filter.dim_size(0), filter.dim_size(1), filter.dim_size(2)}}; + std::array dilations = { + {GetTensorDim(dilation_, data_format_, '0'), + GetTensorDim(dilation_, data_format_, '1'), + GetTensorDim(dilation_, data_format_, '2')}}; std::array strides = {{GetTensorDim(stride_, data_format_, '0'), GetTensorDim(stride_, data_format_, '1'), GetTensorDim(stride_, data_format_, '2')}}; std::array out, padding; - OP_REQUIRES_OK(context, Get3dOutputSize(input_size, filter_size, strides, - padding_, &out, &padding)); + OP_REQUIRES_OK( + context, Get3dOutputSizeV2(input_size, filter_size, dilations, strides, + padding_, &out, &padding)); TensorShape out_shape = ShapeFromFormat( data_format_, in_batch, {{out[0], out[1], out[2]}}, out_depth); Tensor* output; @@ -131,10 +164,12 @@ class Conv3DOp : public BinaryOp { if (out_shape.num_elements() == 0) return; LaunchConvOp::launch(context, cudnn_use_autotune_, input, filter, - strides, padding_, data_format_, output); + dilations, strides, padding_, data_format_, + output); } private: + std::vector dilation_; std::vector stride_; Padding padding_; TensorFormat data_format_; @@ -165,6 +200,7 @@ template struct LaunchConvOp { static void launch(OpKernelContext* ctx, bool cudnn_use_autotune, const Tensor& input_param, const Tensor& filter, + const std::array& dilations, const std::array& strides, const Padding padding, TensorFormat data_format, Tensor* output) { auto* stream = ctx->op_device_context()->stream(); @@ -199,6 +235,7 @@ struct LaunchConvOp { // NOTE: This only works in NHWC. if (filter_planes == 1 && filter_rows == 1 && filter_cols == 1 && + dilations[0] == 1 && dilations[1] == 1 && dilations[2] == 1 && strides[0] == 1 && strides[1] == 1 && strides[2] == 1 && data_format == FORMAT_NHWC) { // 1x1 filter, so call cublas directly. @@ -330,7 +367,10 @@ struct LaunchConvOp { .set_input_feature_map_count(in_depth) .set_output_feature_map_count(out_depth); perftools::gputools::dnn::ConvolutionDescriptor conv_desc(3); - conv_desc.set_filter_stride(DimIndex::X, strides[2]) + conv_desc.set_dilation_rate(DimIndex::X, dilations[2]) + .set_dilation_rate(DimIndex::Y, dilations[1]) + .set_dilation_rate(DimIndex::Z, dilations[0]) + .set_filter_stride(DimIndex::X, strides[2]) .set_filter_stride(DimIndex::Y, strides[1]) .set_filter_stride(DimIndex::Z, strides[0]) .set_zero_padding(DimIndex::X, pad_cols / 2) @@ -377,9 +417,7 @@ struct LaunchConvOp { {{in_planes, in_rows, in_cols}}, out_depth, {{filter_planes, filter_rows, filter_cols}}, - // TODO(yangzihao): Send in arbitrary dilation rates after the dilated - // conv is supported. - /*dilation=*/{{1, 1, 1}}, + {{dilations[0], dilations[1], dilations[2]}}, {{strides[0], strides[1], strides[2]}}, {{pad_planes, pad_rows, pad_cols}}, dtype, diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc index 12d6dc5eaf..6dc3d9df31 100644 --- a/tensorflow/core/ops/nn_ops.cc +++ b/tensorflow/core/ops/nn_ops.cc @@ -524,6 +524,7 @@ REGISTER_OP("Conv3DBackpropInput") .Attr("strides: list(int) >= 5") .Attr(GetPaddingAttrString()) .Deprecated(10, "Use Conv3DBackpropInputV2") + .Attr("dilations: list(int) = [1, 1, 1, 1, 1]") .SetShapeFn([](InferenceContext* c) { return UnchangedShapeWithRank(c, 5); }); @@ -537,6 +538,7 @@ REGISTER_OP("Conv3DBackpropFilter") .Attr("strides: list(int) >= 5") .Attr(GetPaddingAttrString()) .Deprecated(10, "Use Conv3DBackpropFilterV2") + .Attr("dilations: list(int) = [1, 1, 1, 1, 1]") .SetShapeFn([](InferenceContext* c) { ShapeHandle out; TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 5, &out)); diff --git a/tensorflow/python/kernel_tests/conv_ops_3d_test.py b/tensorflow/python/kernel_tests/conv_ops_3d_test.py index f4616fd661..0b531125f3 100644 --- a/tensorflow/python/kernel_tests/conv_ops_3d_test.py +++ b/tensorflow/python/kernel_tests/conv_ops_3d_test.py @@ -28,6 +28,7 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import test_util from tensorflow.python.ops import gradient_checker +from tensorflow.python.ops import gradients_impl from tensorflow.python.ops import nn_ops import tensorflow.python.ops.nn_grad # pylint: disable=unused-import from tensorflow.python.platform import test @@ -61,18 +62,18 @@ class Conv3DTest(test.TestCase): def _SetupValuesForDevice(self, tensor_in_sizes, filter_in_sizes, stride, padding, data_format, dtype, use_gpu): - total_size_1 = 1 - total_size_2 = 1 + total_size_tensor = 1 + total_size_filter = 1 for s in tensor_in_sizes: - total_size_1 *= s + total_size_tensor *= s for s in filter_in_sizes: - total_size_2 *= s + total_size_filter *= s # Initializes the input tensor with array containing numbers from 0 to 1. # We keep the input tensor values fairly small to avoid overflowing float16 # during the conv3d. - x1 = [f * 1.0 / total_size_1 for f in range(1, total_size_1 + 1)] - x2 = [f * 1.0 / total_size_2 for f in range(1, total_size_2 + 1)] + x1 = [f * 1.0 / total_size_tensor for f in range(1, total_size_tensor + 1)] + x2 = [f * 1.0 / total_size_filter for f in range(1, total_size_filter + 1)] with self.test_session(use_gpu=use_gpu): t1 = constant_op.constant(x1, shape=tensor_in_sizes, dtype=dtype) t2 = constant_op.constant(x2, shape=filter_in_sizes, dtype=dtype) @@ -118,6 +119,79 @@ class Conv3DTest(test.TestCase): self.assertAllClose(expected, value.flatten(), atol=tol, rtol=tol) + def _ComputeReferenceDilatedConv(self, tensor_in_sizes, filter_in_sizes, + stride, dilation, padding, data_format, + use_gpu): + total_size_tensor = 1 + total_size_filter = 1 + for s in tensor_in_sizes: + total_size_tensor *= s + for s in filter_in_sizes: + total_size_filter *= s + + # Initializes the input tensor with array containing incrementing + # numbers from 1. + x1 = [f * 1.0 for f in range(1, total_size_tensor + 1)] + x2 = [f * 1.0 for f in range(1, total_size_filter + 1)] + with self.test_session(use_gpu=use_gpu): + t1 = constant_op.constant(x1, shape=tensor_in_sizes) + t2 = constant_op.constant(x2, shape=filter_in_sizes) + if isinstance(stride, collections.Iterable): + strides = list(stride) + else: + strides = [stride, stride, stride] + if data_format == "NCDHW": + t1 = test_util.NHWCToNCHW(t1) + full_strides = [1, 1] + strides + full_dilation = [1, 1] + dilation + else: + full_strides = [1] + strides + [1] + full_dilation = [1] + dilation + [1] + expected = nn_ops.convolution( + t1, + t2, + padding=padding, + strides=strides, + dilation_rate=dilation, + data_format=data_format) + computed = nn_ops.conv3d( + t1, + t2, + strides=full_strides, + dilations=full_dilation, + padding=padding, + data_format=data_format) + if data_format == "NCDHW": + expected = test_util.NCHWToNHWC(expected) + computed = test_util.NCHWToNHWC(computed) + return expected, computed + + def _VerifyDilatedConvValues(self, tensor_in_sizes, filter_in_sizes, stride, + padding, dilations): + expected_results = [] + computed_results = [] + default_dilations = ( + dilations[0] == 1 and dilations[1] == 1 and dilations[2] == 1) + for data_format, use_gpu in GetTestConfigs(): + # If any dilation rate is larger than 1, only do test on the GPU + # because we currently do not have a CPU implementation for arbitrary + # dilation rates. + if default_dilations or use_gpu: + expected, computed = self._ComputeReferenceDilatedConv( + tensor_in_sizes, filter_in_sizes, stride, dilations, padding, + data_format, use_gpu) + expected_results.append(expected) + computed_results.append(computed) + tolerance = 1e-2 if use_gpu else 1e-5 + with self.test_session() as sess: + expected_values = sess.run(expected_results) + computed_values = sess.run(computed_results) + for e_value, c_value in zip(expected_values, computed_values): + print("expected = ", e_value) + print("actual = ", c_value) + self.assertAllClose( + e_value.flatten(), c_value.flatten(), atol=tolerance, rtol=1e-6) + def testConv3D1x1x1Filter(self): expected_output = [ 0.18518519, 0.22222222, 0.25925926, 0.40740741, 0.5, 0.59259259, @@ -145,6 +219,15 @@ class Conv3DTest(test.TestCase): padding="VALID", expected=expected_output) + def testConv3D1x1x1Filter2x1x1Dilation(self): + if test.is_gpu_available(cuda_only=True): + self._VerifyDilatedConvValues( + tensor_in_sizes=[1, 3, 6, 1, 1], + filter_in_sizes=[1, 1, 1, 1, 1], + stride=1, + padding="VALID", + dilations=[2, 1, 1]) + # Expected values computed using scipy's correlate function. def testConv3D2x2x2Filter(self): expected_output = [ @@ -161,6 +244,15 @@ class Conv3DTest(test.TestCase): padding="VALID", expected=expected_output) + def testConv3D2x2x2Filter1x2x1Dilation(self): + if test.is_gpu_available(cuda_only=True): + self._VerifyDilatedConvValues( + tensor_in_sizes=[1, 4, 6, 3, 1], + filter_in_sizes=[2, 2, 2, 1, 1], + stride=1, + padding="VALID", + dilations=[1, 2, 1]) + def testConv3DStrides(self): expected_output = [ 0.06071429, 0.08988095, 0.10238095, 0.11488095, 0.12738095, 0.13988095, @@ -546,6 +638,98 @@ class Conv3DTest(test.TestCase): padding="SAME", test_input=False) + # Testing for backprops + def _RunAndVerifyBackprop(self, input_sizes, filter_sizes, output_sizes, + strides, dilations, padding, data_format, use_gpu, + err, mode): + total_input_size = 1 + total_filter_size = 1 + for s in input_sizes: + total_input_size *= s + for s in filter_sizes: + total_filter_size *= s + # Initializes the input tensor with array containing incrementing + # numbers from 1. + x1 = [f * 1.0 for f in range(1, total_input_size + 1)] + x2 = [f * 1.0 for f in range(1, total_filter_size + 1)] + default_dilations = ( + dilations[0] == 1 and dilations[1] == 1 and dilations[2] == 1) + + # If any dilation rate is larger than 1, only do test on the GPU + # because we currently do not have a CPU implementation for arbitrary + # dilation rates. + if default_dilations or use_gpu: + with self.test_session(use_gpu=use_gpu) as sess: + if data_format == "NCDHW": + input_sizes = test_util.NHWCToNCHW(input_sizes) + t1 = constant_op.constant(x1, shape=input_sizes) + t2 = constant_op.constant(x2, shape=filter_sizes) + full_strides = [1] + strides + [1] + full_dilations = [1] + dilations + [1] + if data_format == "NCDHW": + full_strides = test_util.NHWCToNCHW(full_strides) + full_dilations = test_util.NHWCToNCHW(full_dilations) + actual = nn_ops.conv3d( + t1, + t2, + strides=full_strides, + dilations=full_dilations, + padding=padding, + data_format=data_format) + expected = nn_ops.convolution( + t1, + t2, + padding=padding, + strides=strides, + dilation_rate=dilations, + data_format=data_format) + if data_format == "NCDHW": + actual = test_util.NCHWToNHWC(actual) + expected = test_util.NCHWToNHWC(expected) + actual_grad = gradients_impl.gradients(actual, t1 + if mode == "input" else t2)[0] + expected_grad = gradients_impl.gradients(expected, t1 + if mode == "input" else t2)[0] + # "values" consists of two tensors for two backprops + actual_value = sess.run(actual_grad) + expected_value = sess.run(expected_grad) + self.assertShapeEqual(actual_value, actual_grad) + self.assertShapeEqual(expected_value, expected_grad) + print("expected = ", expected_value) + print("actual = ", actual_value) + self.assertArrayNear(expected_value.flatten(), actual_value.flatten(), + err) + + def testConv3D2x2Depth3ValidBackpropFilterStride1x1Dilation2x1(self): + if test.is_gpu_available(cuda_only=True): + for (data_format, use_gpu) in GetTestConfigs(): + self._RunAndVerifyBackprop( + input_sizes=[1, 3, 6, 1, 1], + filter_sizes=[2, 2, 1, 1, 1], + output_sizes=[1, 1, 5, 1, 1], + strides=[1, 1, 1], + dilations=[2, 1, 1], + padding="VALID", + data_format=data_format, + use_gpu=use_gpu, + err=1e-5, + mode="filter") + + def testConv3D2x2Depth3ValidBackpropInputStride1x1Dilation2x1(self): + if test.is_gpu_available(cuda_only=True): + for (data_format, use_gpu) in GetTestConfigs(): + self._RunAndVerifyBackprop( + input_sizes=[1, 3, 6, 1, 1], + filter_sizes=[2, 2, 1, 1, 1], + output_sizes=[1, 1, 5, 1, 1], + strides=[1, 1, 1], + dilations=[2, 1, 1], + padding="VALID", + data_format=data_format, + use_gpu=use_gpu, + err=1e-5, + mode="input") + if __name__ == "__main__": test.main() diff --git a/tensorflow/python/ops/nn_grad.py b/tensorflow/python/ops/nn_grad.py index 4af5bd26dd..3a41391340 100644 --- a/tensorflow/python/ops/nn_grad.py +++ b/tensorflow/python/ops/nn_grad.py @@ -94,6 +94,7 @@ def _Conv3DGrad(op, grad): array_ops.shape(op.inputs[0]), op.inputs[1], grad, + dilations=op.get_attr("dilations"), strides=op.get_attr("strides"), padding=op.get_attr("padding"), data_format=data_format), @@ -101,6 +102,7 @@ def _Conv3DGrad(op, grad): op.inputs[0], array_ops.shape(op.inputs[1]), grad, + dilations=op.get_attr("dilations"), strides=op.get_attr("strides"), padding=op.get_attr("padding"), data_format=data_format) @@ -116,12 +118,14 @@ def _Conv3DBackpropInputGrad(op, grad): grad, array_ops.shape(op.inputs[1]), op.inputs[2], + dilations=op.get_attr("dilations"), strides=op.get_attr("strides"), padding=op.get_attr("padding"), data_format=data_format), nn_ops.conv3d( grad, op.inputs[1], + dilations=op.get_attr("dilations"), strides=op.get_attr("strides"), padding=op.get_attr("padding"), data_format=data_format) @@ -136,12 +140,14 @@ def _Conv3DBackpropFilterGrad(op, grad): array_ops.shape(op.inputs[0]), grad, op.inputs[2], + dilations=op.get_attr("dilations"), strides=op.get_attr("strides"), padding=op.get_attr("padding"), data_format=data_format), None, nn_ops.conv3d( op.inputs[0], grad, + dilations=op.get_attr("dilations"), strides=op.get_attr("strides"), padding=op.get_attr("padding"), data_format=data_format) -- GitLab From b23e91d247368f2046dae035b5c7bdda56512077 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 20 Apr 2018 12:37:39 -0700 Subject: [PATCH 088/434] Changed tf_to_tflite build rule. PiperOrigin-RevId: 193707628 --- tensorflow/contrib/lite/build_def.bzl | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tensorflow/contrib/lite/build_def.bzl b/tensorflow/contrib/lite/build_def.bzl index b8f6b7fd59..8521677682 100644 --- a/tensorflow/contrib/lite/build_def.bzl +++ b/tensorflow/contrib/lite/build_def.bzl @@ -124,19 +124,19 @@ def tf_to_tflite(name, src, options, out): out: name of the output flatbuffer file. """ - toco = "//tensorflow/contrib/lite/toco:toco" + toco_cmdline = " ".join([ + "//tensorflow/contrib/lite/toco:toco", + "--input_format=TENSORFLOW_GRAPHDEF", + "--output_format=TFLITE", + ("--input_file=$(location %s)" % src), + ("--output_file=$(location %s)" % out), + ] + options ) native.genrule( name = name, - srcs=[src, options], + srcs=[src], outs=[out], - cmd = ("$(location %s) " + - " --input_file=$(location %s) " + - " --output_file=$(location %s) " + - " --input_format=TENSORFLOW_GRAPHDEF" + - " --output_format=TFLITE" + - " `cat $(location %s)`") - % (toco, src, out, options), - tools= [toco], + cmd = toco_cmdline, + tools= ["//tensorflow/contrib/lite/toco:toco"], ) def tflite_to_json(name, src, out): -- GitLab From 517d1912f4ec71180944320350a3694332a1dedc Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Fri, 20 Apr 2018 12:40:57 -0700 Subject: [PATCH 089/434] Add a utility to visualize object-based checkpoints Useful for generating a warm fuzzy feeling that everything you think should be saved was saved, and for explaining what object-based checkpointing is. (Also useful on the former front will be a planned "assert that all of this Graph's trainable variables are accessible from object X" function.) Somewhat hacky since it generates strings rather than using the pydot bindings (and so works without a pydot dependency). PiperOrigin-RevId: 193708003 --- tensorflow/contrib/BUILD | 1 + tensorflow/contrib/checkpoint/__init__.py | 3 + tensorflow/contrib/checkpoint/python/BUILD | 32 +++++ .../contrib/checkpoint/python/visualize.py | 111 ++++++++++++++++++ .../checkpoint/python/visualize_test.py | 97 +++++++++++++++ 5 files changed, 244 insertions(+) create mode 100644 tensorflow/contrib/checkpoint/python/visualize.py create mode 100644 tensorflow/contrib/checkpoint/python/visualize_test.py diff --git a/tensorflow/contrib/BUILD b/tensorflow/contrib/BUILD index 7e47516550..d28392a62c 100644 --- a/tensorflow/contrib/BUILD +++ b/tensorflow/contrib/BUILD @@ -25,6 +25,7 @@ py_library( "//tensorflow/contrib/batching:batch_py", "//tensorflow/contrib/bayesflow:bayesflow_py", "//tensorflow/contrib/boosted_trees:init_py", + "//tensorflow/contrib/checkpoint/python:checkpoint", "//tensorflow/contrib/cloud:cloud_py", "//tensorflow/contrib/cluster_resolver:cluster_resolver_pip", "//tensorflow/contrib/cluster_resolver:cluster_resolver_py", diff --git a/tensorflow/contrib/checkpoint/__init__.py b/tensorflow/contrib/checkpoint/__init__.py index 70d7d2d8d7..1192cc44a1 100644 --- a/tensorflow/contrib/checkpoint/__init__.py +++ b/tensorflow/contrib/checkpoint/__init__.py @@ -16,6 +16,7 @@ For creating and managing dependencies: +@@dot_graph_from_checkpoint @@split_dependency """ @@ -24,6 +25,8 @@ from __future__ import division from __future__ import print_function from tensorflow.contrib.checkpoint.python.split_dependency import split_dependency +from tensorflow.contrib.checkpoint.python.visualize import dot_graph_from_checkpoint + from tensorflow.python.util.all_util import remove_undocumented remove_undocumented(module_name=__name__) diff --git a/tensorflow/contrib/checkpoint/python/BUILD b/tensorflow/contrib/checkpoint/python/BUILD index d57b01aab2..a5681ffa61 100644 --- a/tensorflow/contrib/checkpoint/python/BUILD +++ b/tensorflow/contrib/checkpoint/python/BUILD @@ -4,6 +4,15 @@ package(default_visibility = ["//tensorflow:internal"]) load("//tensorflow:tensorflow.bzl", "py_test") +py_library( + name = "checkpoint", + srcs_version = "PY2AND3", + deps = [ + ":split_dependency", + ":visualize", + ], +) + py_library( name = "split_dependency", srcs = ["split_dependency.py"], @@ -27,3 +36,26 @@ py_test( "//tensorflow/python/eager:test", ], ) + +py_library( + name = "visualize", + srcs = ["visualize.py"], + srcs_version = "PY2AND3", + visibility = ["//tensorflow:internal"], + deps = [ + "//tensorflow/python:pywrap_tensorflow", + ], +) + +py_test( + name = "visualize_test", + srcs = ["visualize_test.py"], + deps = [ + ":visualize", + "//tensorflow/python:array_ops", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:resource_variable_ops", + "//tensorflow/python:training", + "//tensorflow/python/eager:test", + ], +) diff --git a/tensorflow/contrib/checkpoint/python/visualize.py b/tensorflow/contrib/checkpoint/python/visualize.py new file mode 100644 index 0000000000..86fbdb41d2 --- /dev/null +++ b/tensorflow/contrib/checkpoint/python/visualize.py @@ -0,0 +1,111 @@ +"""Utilities for visualizing dependency graphs.""" +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.core.protobuf import checkpointable_object_graph_pb2 +from tensorflow.python import pywrap_tensorflow +from tensorflow.python.framework import errors_impl +from tensorflow.python.training import checkpointable + + +def dot_graph_from_checkpoint(save_path): + r"""Visualizes an object-based checkpoint (from `tf.train.Checkpoint`). + + Useful for inspecting checkpoints and debugging loading issues. + + Example usage from Python (requires pydot): + ```python + import tensorflow as tf + import pydot + + dot_string = tf.contrib.checkpoint.dot_graph_from_checkpoint('/path/to/ckpt') + parsed, = pydot.graph_from_dot_data(dot_string) + parsed.write_svg('/tmp/tensorflow/visualized_checkpoint.svg') + ``` + + Example command line usage: + ```sh + python -c "import tensorflow as tf;\ + print(tf.contrib.checkpoint.dot_graph_from_checkpoint('/path/to/ckpt'))"\ + | dot -Tsvg > /tmp/tensorflow/checkpoint_viz.svg + ``` + + Args: + save_path: The checkpoint prefix, as returned by `tf.train.Checkpoint.save` + or `tf.train.latest_checkpoint`. + Returns: + A graph in DOT format as a string. + """ + reader = pywrap_tensorflow.NewCheckpointReader(save_path) + try: + object_graph_string = reader.get_tensor( + checkpointable.OBJECT_GRAPH_PROTO_KEY) + except errors_impl.NotFoundError: + raise ValueError( + ('The specified checkpoint "%s" does not appear to be object-based (it ' + 'is missing the key "%s"). Likely it was created with a name-based ' + 'saver and does not contain an object dependency graph.') % ( + save_path, checkpointable.OBJECT_GRAPH_PROTO_KEY)) + shape_map = reader.get_variable_to_shape_map() + dtype_map = reader.get_variable_to_dtype_map() + object_graph = ( + checkpointable_object_graph_pb2.CheckpointableObjectGraph()) + object_graph.ParseFromString(object_graph_string) + graph = 'digraph {\n' + def _escape(name): + return name.replace('"', '\\"') + slot_ids = set() + for node in object_graph.nodes: + for slot_reference in node.slot_variables: + slot_ids.add(slot_reference.slot_variable_node_id) + for node_id, node in enumerate(object_graph.nodes): + if (len(node.attributes) == 1 + and node.attributes[0].name == checkpointable.VARIABLE_VALUE_KEY): + if node_id in slot_ids: + color = 'orange' + tooltip_prefix = 'Slot variable' + else: + color = 'blue' + tooltip_prefix = 'Variable' + attribute = node.attributes[0] + graph += ('N_%d [shape=point label="" color=%s width=.25' + ' tooltip="%s %s shape=%s %s"]\n') % ( + node_id, + color, + tooltip_prefix, + _escape(attribute.full_name), + shape_map[attribute.checkpoint_key], + dtype_map[attribute.checkpoint_key].name) + elif node.slot_variables: + graph += ('N_%d [shape=point label="" width=.25 color=red,' + 'tooltip="Optimizer"]\n') % node_id + else: + graph += 'N_%d [shape=point label="" width=.25]\n' % node_id + for reference in node.children: + graph += 'N_%d -> N_%d [label="%s"]\n' % ( + node_id, reference.node_id, _escape(reference.local_name)) + for slot_reference in node.slot_variables: + graph += 'N_%d -> N_%d [label="%s" style=dotted]\n' % ( + node_id, + slot_reference.slot_variable_node_id, + _escape(slot_reference.slot_name)) + graph += 'N_%d -> N_%d [style=dotted]\n' % ( + slot_reference.original_variable_node_id, + slot_reference.slot_variable_node_id) + graph += '}\n' + return graph diff --git a/tensorflow/contrib/checkpoint/python/visualize_test.py b/tensorflow/contrib/checkpoint/python/visualize_test.py new file mode 100644 index 0000000000..1d9ab78923 --- /dev/null +++ b/tensorflow/contrib/checkpoint/python/visualize_test.py @@ -0,0 +1,97 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import functools +import os + +from tensorflow.contrib.checkpoint.python import visualize + +from tensorflow.python.eager import context +from tensorflow.python.eager import test +from tensorflow.python.framework import constant_op +from tensorflow.python.keras._impl.keras.engine import training +from tensorflow.python.keras._impl.keras.layers import core +from tensorflow.python.ops import resource_variable_ops +from tensorflow.python.training import adam +from tensorflow.python.training import checkpointable_utils + +try: + import pydot # pylint: disable=g-import-not-at-top +except ImportError: + pydot = None + + +class MyModel(training.Model): + """A concrete Model for testing.""" + + def __init__(self): + super(MyModel, self).__init__() + self._named_dense = core.Dense(1, use_bias=True) + self._second = core.Dense(1, use_bias=False) + + def call(self, values): + ret = self._second(self._named_dense(values)) + return ret + + +class DotGraphTests(test.TestCase): + + def testMakeDotGraph(self): + with context.eager_mode(): + input_value = constant_op.constant([[3.]]) + model = MyModel() + optimizer = adam.AdamOptimizer(0.001) + optimizer_step = resource_variable_ops.ResourceVariable(12) + save_checkpoint = checkpointable_utils.Checkpoint( + optimizer=optimizer, model=model, optimizer_step=optimizer_step) + optimizer.minimize(functools.partial(model, input_value)) + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, 'ckpt') + save_path = save_checkpoint.save(checkpoint_prefix) + prefix = save_checkpoint.save(save_path) + + dot_graph_string = visualize.dot_graph_from_checkpoint(prefix) + + # The remainder of this test is more-or-less optional since it's so + # dependent on pydot/platform/Python versions. + if pydot is None: + self.skipTest('pydot is required for the remainder of this test.') + try: + parsed, = pydot.graph_from_dot_data(dot_graph_string) + except NameError as e: + if "name 'dot_parser' is not defined" in str(e): + self.skipTest("pydot isn't working") + else: + raise + # Check that the graph isn't completely trivial + self.assertEqual( + '"model"', + parsed.obj_dict['edges'][('N_0', 'N_1')][0]['attributes']['label']) + image_path = os.path.join(self.get_temp_dir(), 'saved.svg') + try: + parsed.write_svg(image_path) + except Exception as e: # pylint: disable=broad-except + # For some reason PyDot's "dot not available" error is an Exception, not + # something more specific. + if '"dot" not found in path' in str(e): + self.skipTest("pydot won't save SVGs (dot not available)") + else: + raise + +if __name__ == '__main__': + test.main() -- GitLab From 0b6ca72332735fe460da23fbcca5c8c24d838f28 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 20 Apr 2018 13:18:02 -0700 Subject: [PATCH 090/434] Update ops-related pbtxt files. PiperOrigin-RevId: 193712839 --- .../core/ops/compat/ops_history.v1.pbtxt | 124 ++++++++++++++++++ tensorflow/core/ops/ops.pbtxt | 26 ++++ 2 files changed, 150 insertions(+) diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt index dbd6f859c4..247f9edf5b 100644 --- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt +++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt @@ -13445,6 +13445,68 @@ op { version: 10 } } +op { + name: "Conv3DBackpropFilter" + input_arg { + name: "input" + type_attr: "T" + } + input_arg { + name: "filter" + type_attr: "T" + } + input_arg { + name: "out_backprop" + type_attr: "T" + } + output_arg { + name: "output" + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_HALF + type: DT_FLOAT + type: DT_DOUBLE + } + } + } + attr { + name: "strides" + type: "list(int)" + has_minimum: true + minimum: 5 + } + attr { + name: "padding" + type: "string" + allowed_values { + list { + s: "SAME" + s: "VALID" + } + } + } + attr { + name: "dilations" + type: "list(int)" + default_value { + list { + i: 1 + i: 1 + i: 1 + i: 1 + i: 1 + } + } + } + deprecation { + version: 10 + } +} op { name: "Conv3DBackpropFilterV2" input_arg { @@ -13718,6 +13780,68 @@ op { version: 10 } } +op { + name: "Conv3DBackpropInput" + input_arg { + name: "input" + type_attr: "T" + } + input_arg { + name: "filter" + type_attr: "T" + } + input_arg { + name: "out_backprop" + type_attr: "T" + } + output_arg { + name: "output" + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_HALF + type: DT_FLOAT + type: DT_DOUBLE + } + } + } + attr { + name: "strides" + type: "list(int)" + has_minimum: true + minimum: 5 + } + attr { + name: "padding" + type: "string" + allowed_values { + list { + s: "SAME" + s: "VALID" + } + } + } + attr { + name: "dilations" + type: "list(int)" + default_value { + list { + i: 1 + i: 1 + i: 1 + i: 1 + i: 1 + } + } + } + deprecation { + version: 10 + } +} op { name: "Conv3DBackpropInputV2" input_arg { diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index 46afe357f0..d1773daebe 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -5651,6 +5651,19 @@ op { } } } + attr { + name: "dilations" + type: "list(int)" + default_value { + list { + i: 1 + i: 1 + i: 1 + i: 1 + i: 1 + } + } + } deprecation { version: 10 explanation: "Use Conv3DBackpropFilterV2" @@ -5774,6 +5787,19 @@ op { } } } + attr { + name: "dilations" + type: "list(int)" + default_value { + list { + i: 1 + i: 1 + i: 1 + i: 1 + i: 1 + } + } + } deprecation { version: 10 explanation: "Use Conv3DBackpropInputV2" -- GitLab From 99167d3a6393ac47c2e01b6f620a03adeb9ac3e4 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 20 Apr 2018 13:48:37 -0700 Subject: [PATCH 091/434] Merged commit includes the following changes: 193717076 by yifeif: Automated g4 rollback of changelist 193713153. -- 193716750 by fchollet: Refactor `tf.keras.layers.Embedding` layer to use `embedding_lookup` instead of `gather`. This makes the layer TPU-compatible. -- 193716664 by A. Unique TensorFlower: Go: Update generated wrapper functions for TensorFlow ops. -- 193713153 by power: Experimental Keras TPU compatibility layer. -- PiperOrigin-RevId: 193717076 --- tensorflow/go/op/wrappers.go | 32 +++++++++++++++++-- tensorflow/python/keras/BUILD | 1 + .../keras/_impl/keras/layers/embeddings.py | 4 +-- .../_impl/keras/layers/embeddings_test.py | 13 ++++++++ 4 files changed, 46 insertions(+), 4 deletions(-) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index 3b3dff0573..ec7d9dcc4f 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -5917,6 +5917,17 @@ func Sqrt(scope *Scope, x tf.Output) (y tf.Output) { return op.Output(0) } +// Conv3DBackpropFilterAttr is an optional argument to Conv3DBackpropFilter. +type Conv3DBackpropFilterAttr func(optionalAttr) + +// Conv3DBackpropFilterDilations sets the optional dilations attribute to value. +// If not specified, defaults to +func Conv3DBackpropFilterDilations(value []int64) Conv3DBackpropFilterAttr { + return func(m optionalAttr) { + m["dilations"] = value + } +} + // Computes the gradients of 3-D convolution with respect to the filter. // // DEPRECATED at GraphDef version 10: Use Conv3DBackpropFilterV2 @@ -5930,11 +5941,14 @@ func Sqrt(scope *Scope, x tf.Output) (y tf.Output) { // strides: 1-D tensor of length 5. The stride of the sliding window for each // dimension of `input`. Must have `strides[0] = strides[4] = 1`. // padding: The type of padding algorithm to use. -func Conv3DBackpropFilter(scope *Scope, input tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string) (output tf.Output) { +func Conv3DBackpropFilter(scope *Scope, input tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...Conv3DBackpropFilterAttr) (output tf.Output) { if scope.Err() != nil { return } attrs := map[string]interface{}{"strides": strides, "padding": padding} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ Type: "Conv3DBackpropFilter", Input: []tf.Input{ @@ -12306,6 +12320,17 @@ func MaxPool3D(scope *Scope, input tf.Output, ksize []int64, strides []int64, pa return op.Output(0) } +// Conv3DBackpropInputAttr is an optional argument to Conv3DBackpropInput. +type Conv3DBackpropInputAttr func(optionalAttr) + +// Conv3DBackpropInputDilations sets the optional dilations attribute to value. +// If not specified, defaults to +func Conv3DBackpropInputDilations(value []int64) Conv3DBackpropInputAttr { + return func(m optionalAttr) { + m["dilations"] = value + } +} + // Computes the gradients of 3-D convolution with respect to the input. // // DEPRECATED at GraphDef version 10: Use Conv3DBackpropInputV2 @@ -12319,11 +12344,14 @@ func MaxPool3D(scope *Scope, input tf.Output, ksize []int64, strides []int64, pa // strides: 1-D tensor of length 5. The stride of the sliding window for each // dimension of `input`. Must have `strides[0] = strides[4] = 1`. // padding: The type of padding algorithm to use. -func Conv3DBackpropInput(scope *Scope, input tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string) (output tf.Output) { +func Conv3DBackpropInput(scope *Scope, input tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...Conv3DBackpropInputAttr) (output tf.Output) { if scope.Err() != nil { return } attrs := map[string]interface{}{"strides": strides, "padding": padding} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ Type: "Conv3DBackpropInput", Input: []tf.Input{ diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD index 70040b7e74..1c58553156 100755 --- a/tensorflow/python/keras/BUILD +++ b/tensorflow/python/keras/BUILD @@ -208,6 +208,7 @@ py_library( "//tensorflow/python:array_ops", "//tensorflow/python:distribute", "//tensorflow/python:dtypes", + "//tensorflow/python:embedding_ops", "//tensorflow/python:framework_ops", "//tensorflow/python:logging_ops", "//tensorflow/python:math_ops", diff --git a/tensorflow/python/keras/_impl/keras/layers/embeddings.py b/tensorflow/python/keras/_impl/keras/layers/embeddings.py index 591bab7cd8..07b8726b85 100644 --- a/tensorflow/python/keras/_impl/keras/layers/embeddings.py +++ b/tensorflow/python/keras/_impl/keras/layers/embeddings.py @@ -24,7 +24,7 @@ from tensorflow.python.keras._impl.keras import initializers from tensorflow.python.keras._impl.keras import regularizers from tensorflow.python.keras._impl.keras.engine import Layer from tensorflow.python.keras._impl.keras.engine.base_layer import shape_type_conversion -from tensorflow.python.ops import array_ops +from tensorflow.python.ops import embedding_ops from tensorflow.python.ops import math_ops from tensorflow.python.util.tf_export import tf_export @@ -155,7 +155,7 @@ class Embedding(Layer): def call(self, inputs): if K.dtype(inputs) != 'int32': inputs = math_ops.cast(inputs, 'int32') - out = array_ops.gather(self.embeddings, inputs) + out = embedding_ops.embedding_lookup(self.embeddings, inputs) return out def get_config(self): diff --git a/tensorflow/python/keras/_impl/keras/layers/embeddings_test.py b/tensorflow/python/keras/_impl/keras/layers/embeddings_test.py index 9f6793eac8..6ebf5dc94a 100644 --- a/tensorflow/python/keras/_impl/keras/layers/embeddings_test.py +++ b/tensorflow/python/keras/_impl/keras/layers/embeddings_test.py @@ -18,6 +18,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import numpy as np + from tensorflow.python.framework import test_util as tf_test_util from tensorflow.python.keras._impl import keras from tensorflow.python.keras._impl.keras import testing_utils @@ -65,6 +67,17 @@ class EmbeddingTest(test.TestCase): input_dtype='int32', expected_output_dtype='float32') + def test_embedding_correctness(self): + with self.test_session(): + layer = keras.layers.Embedding(output_dim=2, input_dim=2) + layer.build((None, 2)) + matrix = np.array([[1, 1], [2, 2]]) + layer.set_weights([matrix]) + + inputs = keras.backend.constant([[0, 1, 0]], dtype='int32') + outputs = keras.backend.eval(layer(inputs)) + self.assertAllClose(outputs, [[[1, 1], [2, 2], [1, 1]]]) + if __name__ == '__main__': test.main() -- GitLab From 5a4356be6822dfe0b0f973852b9b65d69e4c169c Mon Sep 17 00:00:00 2001 From: Brian Patton Date: Fri, 20 Apr 2018 13:54:00 -0700 Subject: [PATCH 092/434] Fix for: Suggest braces around initialization of subobject. PiperOrigin-RevId: 193717872 --- tensorflow/python/lib/core/bfloat16.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/lib/core/bfloat16.cc b/tensorflow/python/lib/core/bfloat16.cc index 7f07deebef..77fa2c1f66 100644 --- a/tensorflow/python/lib/core/bfloat16.cc +++ b/tensorflow/python/lib/core/bfloat16.cc @@ -616,8 +616,8 @@ bool Initialize() { }; // Comparisons - const std::array compare_types = {npy_bfloat16_, npy_bfloat16_, - NPY_BOOL}; + const std::array compare_types = { + {npy_bfloat16_, npy_bfloat16_, NPY_BOOL}}; if (!register_ufunc("equal", CompareUFunc, compare_types)) { -- GitLab From 1cd64d57143814fc0652c09165735be62d96124f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 20 Apr 2018 13:56:55 -0700 Subject: [PATCH 093/434] Track dependencies between outside_compilation clusters so that control edges can be correctly added to sequence compiled computations. PiperOrigin-RevId: 193718295 --- .../jit/encapsulate_subgraphs_pass.cc | 378 ++++++++++- .../jit/encapsulate_subgraphs_pass_test.cc | 590 +++++++++++++++++- tensorflow/compiler/tf2xla/xla_compiler.cc | 25 + tensorflow/compiler/tf2xla/xla_compiler.h | 20 + 4 files changed, 1005 insertions(+), 8 deletions(-) diff --git a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc index 9465385b58..7507e193b5 100644 --- a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc +++ b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc @@ -23,6 +23,7 @@ limitations under the License. #include #include "tensorflow/compiler/jit/graph_to_functiondef.h" +#include "tensorflow/compiler/jit/graphcycles/graphcycles.h" #include "tensorflow/compiler/jit/legacy_flags/encapsulate_subgraphs_pass_flags.h" #include "tensorflow/compiler/jit/mark_for_compilation_pass.h" #include "tensorflow/compiler/jit/shape_inference_helpers.h" @@ -160,6 +161,11 @@ class Encapsulator { std::move(outside_compilation_attribute)), graph_in_(graph_in) {} + // Find dependencies between subgraphs and outside_compilation clusters that + // only manifest via edges between outside_compilation clusters in the outer + // (non-compiled) graph. + Status FindClusterDependencies(); + // Find subgraphs marked with 'group_attribute', and build a new // subgraph, one for each value of 'group_attribute'. Status SplitIntoSubgraphs(); @@ -230,6 +236,19 @@ class Encapsulator { // the shapes of any ancestor RAH outputs. If it can be determined that the // shape of the SFH inputs will not be inferrable even once the shapes of the // RAH outputs are known, an error is returned by the rewriter. + // + // Once edges between compiled and outside_compilation clusters have been + // replaced by send/recv ops, some dependencies may no longer be apparent. + // A clustering pass finds all the dependencies between HC nodes that are only + // present as a result of edges between nodes in outside_compilaton clusters. + // Suppose there is a path from outside_compilation cluster C in subgraph S + // to outside_compilation cluster D in subgraph T. If S != T then a control + // edge is added from the call node for S to the call node for T, which + // ensures that C will execute before D because S executes before T. If S==T + // then a control dependency is added between the HC nodes for C and D in S, + // and the HC node for C is added to an 'ancestors' attr in the HC node for D + // so that during compilation of the HC node for D, an XLA control dependency + // can be added to ensure C's SendToHost executes before D's RecvFromHost. class Subgraph { public: // Creates a graph to build the subgraph in, if it doesn't already exist, @@ -324,6 +343,18 @@ class Encapsulator { void RecordOutsideCompilationOutputOrControl( const string& outside_compilation_id, const Edge* edge); + // Records the fact that there is a path from a node in outside_compilation + // cluster ancestor to node in cluster successor that does not go through + // the subgraph. + void RecordOutsideCompilationDependency(const string& successor, + const string& ancestor); + + // Returns the mapping from outside_compilation cluster C to the set of + // outside_compilation clusters that have a path to C entirely outside + // compiled subgraphs. + const std::unordered_map> + OutsideCompilationAncestorMap() const; + // Adds the HostCompute nodes for each outside_compilation subgraph. Status AddHostComputes( const string& subgraph_name, @@ -406,6 +437,13 @@ class Encapsulator { Status AddHostComputeKeyPlaceholder(OutsideCompilationSubgraph* oc_subgraph, Graph* graph_out); + // Get the set of outside_compilation clusters and the dependency edges + // between them. + void GetActiveClusterDependencyGraph( + std::unordered_set* clusters, + std::unordered_set* has_successor, + std::unordered_map>* ancestors_map); + // Builds a _RecvAtHost node producing all the inputs of an // outside_compilation subgraph and stores it in oc_subgraph.recv_at_host. Status AddRecvAtHostNode(const string& group_attribute, @@ -468,6 +506,14 @@ class Encapsulator { // The outside_compilation clusters in this subgraph. std::unordered_map outside_compilation_subgraphs_; + // For each outside_compilation cluster C, the outside_compilation clusters + // that have a path to C outside the compiled graph. + std::unordered_map> + outside_compilation_ancestors_; + // For each outside_compilation cluster C, the outside_compilation clusters + // that have a path from C outside the compiled graph. + std::unordered_map> + outside_compilation_successors_; // NoOp node in the output graph that is sequenced after the call node and // used to prevent host-side outside_compilation sends and recvs from being @@ -556,6 +602,10 @@ class Encapsulator { std::unordered_set, NodeSlot::PairHasher>* edges_added); + // Adds control dependencies between subgraph call nodes that have + // dependencies via outside_compilation edges. + Status AddCallNodeDependencies(Graph* graph_out); + // Adds all edges to the output graph. Status AddEdgesToOutputGraph( const std::unordered_map& node_images, @@ -620,10 +670,65 @@ class Encapsulator { const Graph* graph_in_; std::unordered_map subgraphs_; + // For each subgraph S the subgraphs S' such that there is a path in some + // outside_compilation cluster C in S to some outside_compilation cluster C' + // in S', that goes only through the uncompiled graph. + std::unordered_map> subgraph_ancestors_; TF_DISALLOW_COPY_AND_ASSIGN(Encapsulator); }; +namespace { + +// Return in 'sorted' a topological sort of clusters according to the +// dependencies encoded in ancestors. clusters is the list of all clusters +// including clusters that are not present in the ancestors map. has_successors +// is the set of clusters that are ancestors of some other cluster. +void TopologicalClusterSort( + const std::unordered_set& clusters, + const std::unordered_set& has_successors, + const std::unordered_map>& ancestors, + std::vector* sorted) { + // The nodes are placed in 'sorted' in topological order. + sorted->clear(); + // We don't use the standard DFS because we are not operating on Node* + // objects. + struct Work { + string cluster; + bool leave; + }; + std::set visited; + std::vector stack; + // Seed the processing list with clusters that have no successors. + for (const auto& cluster : clusters) { + if (has_successors.find(cluster) == has_successors.end()) { + stack.push_back({cluster, false}); + } + } + while (!stack.empty()) { + const Work item = stack.back(); + stack.pop_back(); + if (item.leave) { + sorted->push_back(item.cluster); + continue; + } + + if (visited.find(item.cluster) != visited.end()) continue; + visited.insert(item.cluster); + + stack.push_back({item.cluster, true}); + const auto& iter = ancestors.find(item.cluster); + if (iter != ancestors.end()) { + for (const auto& ancestor : iter->second) { + stack.push_back({ancestor, false}); + } + } + } + CHECK(sorted->size() == clusters.size()); +} + +} // namespace + Node* Encapsulator::Subgraph::GetCallNodeForInputs() const { return call_node_inputs_; } @@ -786,12 +891,71 @@ void Encapsulator::Subgraph::RecordOutsideCompilationOutputOrControl( } } +void Encapsulator::Subgraph::RecordOutsideCompilationDependency( + const string& successor, const string& ancestor) { + outside_compilation_ancestors_[successor].insert(ancestor); + outside_compilation_successors_[ancestor].insert(successor); +} + +const std::unordered_map> +Encapsulator::Subgraph::OutsideCompilationAncestorMap() const { + return outside_compilation_ancestors_; +} + +void Encapsulator::Subgraph::GetActiveClusterDependencyGraph( + std::unordered_set* clusters, + std::unordered_set* has_successor, + std::unordered_map>* ancestors_map) { + // During initial clustering the ancestor and successor datastructures may + // have been built including oc_cluster names that never turned into subgraphs + // because they had no edges into or out of the compiled cluster. Remove them + // before proceeding to simplify the logic. Get the set of clusters that was + // actually added, then remove references to the others. + for (const auto& oc_subgraph : outside_compilation_subgraphs_) { + clusters->insert(oc_subgraph.first); + } + for (const auto& cluster : outside_compilation_successors_) { + if (clusters->find(cluster.first) != clusters->end()) { + for (const auto& successor : cluster.second) { + if (clusters->find(successor) != clusters->end()) { + has_successor->insert(cluster.first); + break; + } + } + } + } + for (const auto& cluster : outside_compilation_ancestors_) { + if (clusters->find(cluster.first) != clusters->end()) { + std::unordered_set& ancestors = (*ancestors_map)[cluster.first]; + for (const auto& ancestor : cluster.second) { + if (clusters->find(ancestor) != clusters->end()) { + ancestors.insert(ancestor); + } + } + } + } +} + Status Encapsulator::Subgraph::AddHostComputes( const string& subgraph_name, const std::unordered_map& node_images) { - for (auto& oc_subgraph_iter : outside_compilation_subgraphs_) { - const string& oc_subgraph_name = oc_subgraph_iter.first; - OutsideCompilationSubgraph& oc_subgraph = oc_subgraph_iter.second; + // Get the set of outside_compilation clusters and the dependency edges + // between them. + std::unordered_set clusters; + std::unordered_set has_successor; + std::unordered_map> ancestors_map; + GetActiveClusterDependencyGraph(&clusters, &has_successor, &ancestors_map); + // Topologically sort the outside_compilation clusters according to their + // dependency relation. + std::vector sorted_clusters; + TopologicalClusterSort(clusters, has_successor, ancestors_map, + &sorted_clusters); + + // The host compute nodes added for each outside_compilation_cluster; + std::unordered_map host_compute_node; + for (const string& oc_subgraph_name : sorted_clusters) { + OutsideCompilationSubgraph& oc_subgraph = + outside_compilation_subgraphs_[oc_subgraph_name]; if (!oc_subgraph.inputs.empty() || !oc_subgraph.control_inputs.empty() || !oc_subgraph.outputs_by_src.empty() || !oc_subgraph.control_outputs.empty()) { @@ -811,13 +975,22 @@ Status Encapsulator::Subgraph::AddHostComputes( inputs[input_index].Reset(src_image->name(), src_slot, dtype); input_dtypes[input_index] = dtype; } - for (const auto& output : oc_subgraph.outputs_by_src) { DataType dtype = output.first.dtype; int output_index = output.second; output_dtypes[output_index] = dtype; } + std::vector host_compute_ancestors; + const auto iter = ancestors_map.find(oc_subgraph_name); + if (iter != ancestors_map.end()) { + for (const string& ancestor_cluster : iter->second) { + host_compute_ancestors.push_back( + outside_compilation_subgraphs_[ancestor_cluster] + .host_compute_name); + } + } + NodeDef host_compute_def; NodeDefBuilder builder(strings::StrCat("outside_compilation_", oc_subgraph_name, "_host_compute"), @@ -825,6 +998,7 @@ Status Encapsulator::Subgraph::AddHostComputes( builder.Input(inputs); builder.Attr("Tinputs", input_dtypes); builder.Attr("Toutputs", output_dtypes); + builder.Attr("ancestors", host_compute_ancestors); builder.Attr("key", strings::StrCat("host_compute_channel_", subgraph_name, "_", oc_subgraph_name)); @@ -834,6 +1008,7 @@ Status Encapsulator::Subgraph::AddHostComputes( Node* host_compute = graph_->AddNode(host_compute_def, &s); if (!s.ok()) return s; + host_compute_node[host_compute->name()] = host_compute; oc_subgraph.host_compute_name = host_compute->name(); // Connect the _HostCompute node to its producers in the subgraph. @@ -852,6 +1027,12 @@ Status Encapsulator::Subgraph::AddHostComputes( graph_->AddControlEdge(src_image, host_compute); } + // Connect the _HostCompute node to its ancestor host compute nodes. + for (const auto& ancestor_name : host_compute_ancestors) { + Node* ancestor = host_compute_node[ancestor_name]; + graph_->AddControlEdge(ancestor, host_compute); + } + // Connect the consumers in the subgraph to the _HostCompute node. for (const auto& output : oc_subgraph.outputs_by_dst) { const Node* dst_node = output.first.node; @@ -1654,6 +1835,17 @@ Status Encapsulator::CopyEdgeToOutputGraph( return Status::OK(); } +Status Encapsulator::AddCallNodeDependencies(Graph* graph_out) { + for (const auto& ancestors : subgraph_ancestors_) { + const string& subgraph = ancestors.first; + for (const string& ancestor : ancestors.second) { + graph_out->AddControlEdge(subgraphs_[ancestor].GetCallNodeForOutputs(), + subgraphs_[subgraph].GetCallNodeForInputs()); + } + } + return Status::OK(); +} + Status Encapsulator::AddEdgesToOutputGraph( const std::unordered_map& node_images, bool parallel_checking, Graph* graph_out) { @@ -1703,6 +1895,7 @@ Status Encapsulator::AddEdgesToOutputGraph( Subgraph& subgraph = subgraph_entry.second; subgraph.ConnectSequencerToCallNode(graph_out); } + TF_RETURN_IF_ERROR(AddCallNodeDependencies(graph_out)); return Status::OK(); } @@ -1960,6 +2153,182 @@ Status Encapsulator::DoStaticShapeInferenceForOutsideCompilationSend( return Status::OK(); } +namespace { + +// Helper struct for building cluster dependencies and also debugging cycles in +// the dependencies. While computing dependencies we construct a mapping from +// Node* to PathDetails. +struct PathDetails { + struct SubgraphAndCluster { + string subgraph; + string outside_compilation_cluster; + bool operator==(const SubgraphAndCluster& other) const { + return subgraph == other.subgraph && + outside_compilation_cluster == other.outside_compilation_cluster; + } + }; + + struct SubgraphAndClusterHash { + inline std::size_t operator()(const SubgraphAndCluster& v) const { + return hash()( + strings::StrCat(v.subgraph, v.outside_compilation_cluster)); + } + }; + + typedef std::unordered_set + SubgraphAndClusterSet; + + // Returns the set of (subgraph, oc_cluster) pairs that should be recorded as + // ancestors for any successor of this node. If the node is in the outer + // graph, it returns the transitive union of the ancestors of the node's + // inputs. If the node is in an outside_compilation cluster, it returns just + // that cluster. If the node is compiled, it returns the empty set. + SubgraphAndClusterSet AncestorsForSuccessor() { + if (subgraph.empty()) { + return ancestor_clusters; + } else if (outside_compilation_cluster.empty()) { + return SubgraphAndClusterSet(); + } else { + SubgraphAndCluster entry; + entry.subgraph = subgraph; + entry.outside_compilation_cluster = outside_compilation_cluster; + return SubgraphAndClusterSet({entry}); + } + } + + // The transitive union of the ancestor's of this node's inputs. This is only + // saved for debugging in order to print out enough information to debug a + // discovered cycle. + SubgraphAndClusterSet ancestor_clusters; + // The subgraph attr on this node. + string subgraph; + // The outside_compilation attr on this node. + string outside_compilation_cluster; +}; + +// Adds an edge from ancestor to successor to the cycle detector, and returns an +// error if that edge causes the formation of a cycle. In the error case, logs +// the contents of the node_ancestors_map to facilitate debugging. +Status CheckClusterDependencyForCycles( + const string& ancestor, const string& successor, + const std::unordered_map>& ancestors, + const std::unordered_map& node_ancestors_map, + GraphCycles* cycle_detector, std::map* cycle_detector_map) { + if (cycle_detector_map->find(ancestor) == cycle_detector_map->end()) { + (*cycle_detector_map)[ancestor] = cycle_detector->NewNode(); + } + if (cycle_detector_map->find(successor) == cycle_detector_map->end()) { + (*cycle_detector_map)[successor] = cycle_detector->NewNode(); + } + + if (!cycle_detector->InsertEdge((*cycle_detector_map)[ancestor], + (*cycle_detector_map)[successor])) { + LOG(ERROR) << "Cycle in outside_compilation clusters"; + for (const auto& cluster : ancestors) { + LOG(ERROR) << "Cluster " << cluster.first << " depends on:"; + for (const auto& ancestor : cluster.second) { + LOG(ERROR) << " " << ancestor; + } + } + for (const auto& node_ancestors : node_ancestors_map) { + LOG(ERROR) << "Node " << node_ancestors.first->name() << " (" + << node_ancestors.second.subgraph << ";" + << node_ancestors.second.outside_compilation_cluster + << ") has ancestor clusters:"; + for (const auto& ancestor : node_ancestors.second.ancestor_clusters) { + LOG(ERROR) << " " << ancestor.subgraph << ";" + << ancestor.outside_compilation_cluster; + } + } + return errors::InvalidArgument( + "Can't compile outside_compilation clusters because there is a " + "dependency cycle: see error log for details."); + } + return Status::OK(); +} + +} // namespace + +Status Encapsulator::FindClusterDependencies() { + // Map from nodes to ancestor details. A node is entered into the map if it is + // in a compilation subgraph, and outside_compilation cluster, or appears on a + // path in the outer graph leading from an outside_compilation subgraph. + std::unordered_map node_ancestors_map; + // We check that clusters are acyclic using this cycle detector. + GraphCycles cycle_detector; + // Map from cluster name to cycle detector node id. + std::map cycle_detector_map; + // Process the nodes in topologically-sorted order. + std::vector nodes; + GetReversePostOrder(*graph_in_, &nodes); + for (Node* node : nodes) { + string subgraph_name; + string oc_cluster; + TF_RETURN_IF_ERROR(GetFunctionNameAttr(node, &subgraph_name, &oc_cluster)); + // First create an entry in the ancestors map if the node is in a compiled + // subgraph or outside_compilation cluster, or if any incoming edge is from + // a node with an ancestor map entry; and find the union of all the + // ancestors. + if (!subgraph_name.empty()) { + node_ancestors_map[node].subgraph = subgraph_name; + node_ancestors_map[node].outside_compilation_cluster = oc_cluster; + } + for (Node* src : node->in_nodes()) { + const auto iter = node_ancestors_map.find(src); + if (iter != node_ancestors_map.end()) { + const auto& ancestors_to_follow = iter->second.AncestorsForSuccessor(); + for (const auto& ancestor : ancestors_to_follow) { + if (ancestor.subgraph != subgraph_name || + ancestor.outside_compilation_cluster != oc_cluster) { + node_ancestors_map[node].ancestor_clusters.insert(ancestor); + } + } + } + } + if (!subgraph_name.empty()) { + // The node is in a compiled subgraph or an outside_compilation cluster. + if (oc_cluster.empty()) { + // The node is not in an outside_compilation cluster. Record the + // subgraph's ancestor dependencies. + for (const auto& cluster : node_ancestors_map[node].ancestor_clusters) { + if (cluster.subgraph != subgraph_name) { + subgraph_ancestors_[subgraph_name].insert(cluster.subgraph); + TF_RETURN_IF_ERROR(CheckClusterDependencyForCycles( + cluster.subgraph, subgraph_name, subgraph_ancestors_, + node_ancestors_map, &cycle_detector, &cycle_detector_map)); + } + } + } else { + Subgraph& subgraph = subgraphs_[subgraph_name]; + // The node is in an outside_compilation cluster. Record the cluster + // and/or subgraph ancestor dependencies. + for (const auto& cluster : node_ancestors_map[node].ancestor_clusters) { + if (cluster.subgraph == subgraph_name) { + // The ancestor is in the same subgraph. + if (cluster.outside_compilation_cluster != oc_cluster) { + // But not in the same oc_cluster, so record the dependency. + subgraph.RecordOutsideCompilationDependency( + oc_cluster, cluster.outside_compilation_cluster); + TF_RETURN_IF_ERROR(CheckClusterDependencyForCycles( + cluster.outside_compilation_cluster, oc_cluster, + subgraph.OutsideCompilationAncestorMap(), node_ancestors_map, + &cycle_detector, &cycle_detector_map)); + } + } else { + // The ancestor is in a different subgraph, so record the + // dependency. + subgraph_ancestors_[subgraph_name].insert(cluster.subgraph); + TF_RETURN_IF_ERROR(CheckClusterDependencyForCycles( + cluster.subgraph, subgraph_name, subgraph_ancestors_, + node_ancestors_map, &cycle_detector, &cycle_detector_map)); + } + } + } + } + } + return Status::OK(); +} + Status Encapsulator::MakePrunedGraphCopyAndInline( const Graph& graph, const std::vector& sink_nodes, std::unique_ptr* pruned_graph, @@ -2166,6 +2535,7 @@ Status EncapsulateSubgraphsInFunctions( Encapsulator encapsulator(std::move(group_attribute), std::move(outside_compilation_attribute), &graph_in); + TF_RETURN_IF_ERROR(encapsulator.FindClusterDependencies()); TF_RETURN_IF_ERROR(encapsulator.SplitIntoSubgraphs()); TF_RETURN_IF_ERROR(encapsulator.BuildFunctionDefs( diff --git a/tensorflow/compiler/jit/encapsulate_subgraphs_pass_test.cc b/tensorflow/compiler/jit/encapsulate_subgraphs_pass_test.cc index 8599a7038a..3502d1bb45 100644 --- a/tensorflow/compiler/jit/encapsulate_subgraphs_pass_test.cc +++ b/tensorflow/compiler/jit/encapsulate_subgraphs_pass_test.cc @@ -74,7 +74,7 @@ bool EqualProtoMap(const ::tensorflow::protobuf::Map& a, if (!compare(elt_a.first, elt_a.second, iter->second)) { if (diff) { *diff = strings::StrCat(map_name, " expected: element with key '", - key_to_string(elt_a.first), " has value '", + key_to_string(elt_a.first), "' has value '", value_to_string(elt_a.second), "' got: '", value_to_string(iter->second), "'"); } @@ -121,8 +121,22 @@ bool EqualFunctionNodeDef(const NodeDef& a, const NodeDef& b, } return false; } + std::unordered_set control_input_a; + std::unordered_set control_input_b; for (int i = 0; i < a.input_size(); ++i) { - if (a.input(i) != b.input(i)) { + if (str_util::StartsWith(a.input(i), "^")) { + if (!str_util::StartsWith(b.input(i), "^")) { + if (diff) { + *diff = strings::StrCat( + diff_preamble, " mismatch for node ", a.name(), " input ", i, + ", expected control input ", a.input(i), " got ", b.input(i), + " expected:\n", a.DebugString(), "\ngot:\n", b.DebugString()); + } + return false; + } + control_input_a.insert(a.input(i)); + control_input_b.insert(b.input(i)); + } else if (a.input(i) != b.input(i)) { if (diff) { *diff = strings::StrCat(diff_preamble, " mismatch for node ", a.name(), " input ", i, ", expected ", a.input(i), @@ -132,11 +146,29 @@ bool EqualFunctionNodeDef(const NodeDef& a, const NodeDef& b, return false; } } + if (control_input_a != control_input_b) { + if (diff) { + *diff = strings::StrCat(diff_preamble, " mismatch for node ", a.name(), + " control inputs differ expected:\n", + a.DebugString(), "\ngot:\n", b.DebugString()); + } + return false; + } return EqualProtoMap( a.attr(), b.attr(), [](const string& s) { return s; }, [](const AttrValue& v) { return v.DebugString(); }, [](const string& key, const AttrValue& av, const AttrValue& bv) { - return av.DebugString() == bv.DebugString(); + if (key == "ancestors") { + // The ancestors are added from a set so the order is unpredictable; + // just compare set equality not list equality. + std::unordered_set a_set(av.list().s().begin(), + av.list().s().end()); + std::unordered_set b_set(bv.list().s().begin(), + bv.list().s().end()); + return a_set == b_set; + } else { + return av.DebugString() == bv.DebugString(); + } }, strings::StrCat(diff_preamble, " attr mismatch for node ", a.name()), diff); @@ -261,6 +293,7 @@ REGISTER_OP("XlaHostCompute") .Output("outputs: Toutputs") .Attr("Tinputs: list(type) >= 0") .Attr("Toutputs: list(type) >= 0") + .Attr("ancestors: list(string) >= 0") .Attr("key: string") .Attr("shape_inference_graph: string = ''") .Attr("shapes: list(shape) >= 0") @@ -899,6 +932,7 @@ TEST(EncapsulateSubgraphsTest, OneFunctionOneOutside) { {"C:o:0", "c:o:0"}, {{"Tinputs", gtl::ArraySlice({DT_FLOAT, DT_FLOAT})}, {"Toutputs", gtl::ArraySlice({DT_FLOAT})}, + {"ancestors", gtl::ArraySlice({})}, {"key", "host_compute_channel_F1_O1"}, {"shape_inference_graph", "_outside_compilation_shape_inference_F1_O1"}, @@ -1044,17 +1078,20 @@ TEST(EncapsulateSubgraphsTest, OneFunctionTwoOutside) { {"D:o:0", "F:o:0"}, {{"Tinputs", gtl::ArraySlice({DT_FLOAT, DT_FLOAT})}, {"Toutputs", gtl::ArraySlice({DT_FLOAT})}, + {"ancestors", + gtl::ArraySlice({"outside_compilation_O1_host_compute"})}, {"key", "host_compute_channel_F1_O2"}, {"shape_inference_graph", "_outside_compilation_shape_inference_F1_O2"}, {"shapes", gtl::ArraySlice({})}, {"_outside_compilation_subgraph", "O2"}}, - {"F"}}, + {"F", "outside_compilation_O1_host_compute"}}, {{"outside_compilation_O1_host_compute"}, "XlaHostCompute", {"C:o:0", "D:o:0"}, {{"Tinputs", gtl::ArraySlice({DT_FLOAT, DT_FLOAT})}, {"Toutputs", gtl::ArraySlice({DT_FLOAT})}, + {"ancestors", gtl::ArraySlice({})}, {"key", "host_compute_channel_F1_O1"}, {"shape_inference_graph", "_outside_compilation_shape_inference_F1_O1"}, @@ -1193,6 +1230,7 @@ TEST(EncapsulateSubgraphsTest, TwoFunctionsTwoOutside) { {"C:o:0", "D:o:0"}, {{"Tinputs", gtl::ArraySlice({DT_FLOAT, DT_FLOAT})}, {"Toutputs", gtl::ArraySlice({DT_FLOAT})}, + {"ancestors", gtl::ArraySlice({})}, {"key", "host_compute_channel_F1_O1"}, {"shape_inference_graph", "_outside_compilation_shape_inference_F1_O1"}, @@ -1215,6 +1253,7 @@ TEST(EncapsulateSubgraphsTest, TwoFunctionsTwoOutside) { {"G:o:0"}, {{"Tinputs", gtl::ArraySlice({DT_FLOAT})}, {"Toutputs", gtl::ArraySlice({DT_FLOAT})}, + {"ancestors", gtl::ArraySlice({})}, {"key", "host_compute_channel_F2_O1"}, {"shape_inference_graph", ""}, {"shapes", @@ -1279,6 +1318,179 @@ TEST(EncapsulateSubgraphsTest, TwoFunctionsTwoOutside) { TF_EXPECT_FUNCTIONDEFLIBRARY_EQ(library_expected, library); } +// Test with two functions to transform, each with one outside_compilation +// cluster, with the dependency between them purely from an outside_compilation +// edge. +TEST(EncapsulateSubgraphsTest, TwoFunctionsTwoOutsideDependencyFromOutside) { + FunctionDefLibrary library; + GraphDef graphdef; + + { + GraphDefBuilder b1(GraphDefBuilder::kFailImmediately); + Node* a = InputShaped(b1.opts().WithName("A")); + Node* b = InputShaped(b1.opts().WithName("B")); + Node* c = Unary(a, b1.opts().WithName("C").WithAttr("_encapsulate", "F1")); + Node* d = + Binary(b, c, b1.opts().WithName("D").WithAttr("_encapsulate", "F1")); + Node* e = Binary(c, d, + b1.opts() + .WithName("E") + .WithControlInputs({b, d}) + .WithAttr("_encapsulate", "F1") + .WithAttr("_outside", "O1")); + Node* f = Binary(c, e, + b1.opts().WithName("F").WithControlInput(e).WithAttr( + "_encapsulate", "F1")); + Node* g = + Binary(a, b, b1.opts().WithName("G").WithAttr("_encapsulate", "F2")); + Node* h = Unary(g, b1.opts() + .WithName("H") + .WithAttr("_encapsulate", "F2") + .WithAttr("_outside", "O1") + .WithControlInput(e)); + Node* i = Unary(h, b1.opts().WithName("I").WithAttr("_encapsulate", "F2")); + Binary(f, i, b1.opts().WithName("J")); + TF_EXPECT_OK(b1.ToGraphDef(&graphdef)); + } + + TF_EXPECT_OK(Encapsulate(&graphdef, &library)); + + FunctionDefLibrary library_expected; + GraphDef graphdef_expected; + + { + GraphDefBuilder shape(GraphDefBuilder::kFailImmediately); + Node* key_constant = + KeyPlaceholderShape(shape.opts().WithName("KnownShape/_0")); + Node* recv = RecvAtHost(ops::NodeOut(key_constant, 0), "F1", "O1", + {DT_FLOAT, DT_FLOAT}, shape.opts()); + Node* e = Binary(ops::NodeOut(recv, 0), ops::NodeOut(recv, 1), + shape.opts() + .WithName("E") + .WithAttr("_encapsulate", "F1") + .WithAttr("_outside", "O1")); + SendFromHost(ops::NodeOut(key_constant, 0), "F1", "O1", {e}, shape.opts()); + TF_EXPECT_OK( + AddGraphDefToFunctionLibrary(shape, "F1_O1", &library_expected)); + } + + { + GraphDefBuilder shape(GraphDefBuilder::kFailImmediately); + Node* key_constant = + KeyPlaceholderShape(shape.opts().WithName("KnownShape/_0")); + Node* recv = RecvAtHost(ops::NodeOut(key_constant, 0), "F2", "O1", + {DT_FLOAT}, shape.opts()); + Node* h = Unary(recv, shape.opts() + .WithName("H") + .WithAttr("_encapsulate", "F2") + .WithAttr("_outside", "O1")); + SendFromHost(ops::NodeOut(key_constant, 0), "F2", "O1", {h}, shape.opts()); + TF_EXPECT_OK( + AddGraphDefToFunctionLibrary(shape, "F2_O1", &library_expected)); + } + + *library_expected.add_function() = FunctionDefHelper::Create( + "F1", {"a_0_arg:float", "b_0_arg:float"}, {"f_0_retval:float"}, {}, + { + {{"C"}, "UnaryTest", {"a_0_arg"}}, + {{"D"}, "BinaryTest", {"b_0_arg", "C:o:0"}}, + {{"F"}, + "BinaryTest", + {"C:o:0", "outside_compilation_O1_host_compute:outputs:0"}, + {}, + {"outside_compilation_O1_host_compute"}}, + {{"outside_compilation_O1_host_compute"}, + "XlaHostCompute", + {"C:o:0", "D:o:0"}, + {{"Tinputs", gtl::ArraySlice({DT_FLOAT, DT_FLOAT})}, + {"Toutputs", gtl::ArraySlice({DT_FLOAT})}, + {"ancestors", gtl::ArraySlice({})}, + {"key", "host_compute_channel_F1_O1"}, + {"shape_inference_graph", + "_outside_compilation_shape_inference_F1_O1"}, + {"shapes", gtl::ArraySlice({})}, + {"_outside_compilation_subgraph", "O1"}}, + {"D"}}, + }, + {{"f_0_retval", "F:o:0"}}); + + *library_expected.add_function() = FunctionDefHelper::Create( + "F2", {"a_0_arg:float", "b_0_arg:float"}, {"i_0_retval:float"}, {}, + { + {{"G"}, "BinaryTest", {"a_0_arg", "b_0_arg"}}, + {{"I"}, + "UnaryTest", + {"outside_compilation_O1_host_compute:outputs:0"}}, + {{"outside_compilation_O1_host_compute"}, + "XlaHostCompute", + {"G:o:0"}, + {{"Tinputs", gtl::ArraySlice({DT_FLOAT})}, + {"Toutputs", gtl::ArraySlice({DT_FLOAT})}, + {"ancestors", gtl::ArraySlice({})}, + {"key", "host_compute_channel_F2_O1"}, + {"shape_inference_graph", + "_outside_compilation_shape_inference_F2_O1"}, + {"shapes", gtl::ArraySlice({})}, + {"_outside_compilation_subgraph", "O1"}}}, + }, + {{"i_0_retval", "I:o:0"}}); + + { + std::unique_ptr lib_def( + new FunctionLibraryDefinition(OpRegistry::Global(), library_expected)); + GraphDefBuilder b2(GraphDefBuilder::kFailImmediately, lib_def.get()); + Node* a = InputShaped(b2.opts().WithName("A")); + Node* b = InputShaped(b2.opts().WithName("B")); + + Node* key_constant1 = + KeyPlaceholder("F1", b2.opts().WithName("F1_key_placeholder")); + Node* recv1 = RecvAtHost(ops::NodeOut(key_constant1, 0), "F1", "O1", + {DT_FLOAT, DT_FLOAT}, b2.opts()); + Node* e = Binary(ops::NodeOut(recv1, 0), ops::NodeOut(recv1, 1), + b2.opts() + .WithName("E") + .WithControlInputs({recv1, b}) + .WithAttr("_encapsulate", "F1") + .WithAttr("_outside", "O1")); + Node* send1 = SendFromHost(ops::NodeOut(key_constant1, 0), "F1", "O1", {e}, + b2.opts().WithControlInput(e)); + Node* s1 = Sequencer( + b2.opts().WithName("F1_sequencer").WithControlInputs({recv1, send1}), + "F1"); + + NodeBuilder node_builder1("F1", "F1", lib_def.get()); + node_builder1.Input(a).Input(b); + Node* call1 = + b2.opts().WithControlInput(s1).FinalizeBuilder(&node_builder1); + + Node* key_constant2 = + KeyPlaceholder("F2", b2.opts().WithName("F2_key_placeholder")); + Node* recv2 = RecvAtHost(ops::NodeOut(key_constant2, 0), "F2", "O1", + {DT_FLOAT}, b2.opts()); + Node* h = Unary(recv2, b2.opts() + .WithName("H") + .WithAttr("_encapsulate", "F2") + .WithAttr("_outside", "O1") + .WithControlInput(e)); + Node* send2 = SendFromHost(ops::NodeOut(key_constant2, 0), "F2", "O1", {h}, + b2.opts()); + + Node* s2 = Sequencer( + b2.opts().WithName("F2_sequencer").WithControlInputs({recv2, send2}), + "F2"); + NodeBuilder node_builder2("F2", "F2", lib_def.get()); + node_builder2.Input(a).Input(b); + Node* call2 = b2.opts() + .WithControlInputs({s2, call1}) + .FinalizeBuilder(&node_builder2); + Binary(call1, call2, b2.opts().WithName("J")); + TF_EXPECT_OK(b2.ToGraphDef(&graphdef_expected)); + } + + TF_EXPECT_GRAPH_EQ(graphdef_expected, graphdef); + TF_EXPECT_FUNCTIONDEFLIBRARY_EQ(library_expected, library); +} + // Test with one outside_compilation cluster that has no inputs from the // compiled subgraph. TEST(EncapsulateSubgraphsTest, OutsideCompilationNoInputs) { @@ -1323,6 +1535,7 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationNoInputs) { {}, {{"Tinputs", gtl::ArraySlice({})}, {"Toutputs", gtl::ArraySlice({DT_FLOAT})}, + {"ancestors", gtl::ArraySlice({})}, {"key", "host_compute_channel_F1_O1"}, {"shape_inference_graph", ""}, {"shapes", @@ -1406,6 +1619,7 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationControlInput) { {}, {{"Tinputs", gtl::ArraySlice({})}, {"Toutputs", gtl::ArraySlice({DT_FLOAT})}, + {"ancestors", gtl::ArraySlice({})}, {"key", "host_compute_channel_F1_O1"}, {"shape_inference_graph", ""}, {"shapes", @@ -1487,6 +1701,7 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationNoOutputs) { {"D:o:0"}, {{"Tinputs", gtl::ArraySlice({DT_FLOAT})}, {"Toutputs", gtl::ArraySlice({})}, + {"ancestors", gtl::ArraySlice({})}, {"key", "host_compute_channel_F1_O1"}, {"shape_inference_graph", ""}, {"shapes", gtl::ArraySlice({})}, @@ -1567,6 +1782,7 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationControlOutput) { {"D:o:0"}, {{"Tinputs", gtl::ArraySlice({DT_FLOAT})}, {"Toutputs", gtl::ArraySlice({})}, + {"ancestors", gtl::ArraySlice({})}, {"key", "host_compute_channel_F1_O1"}, {"shape_inference_graph", ""}, {"shapes", gtl::ArraySlice({})}, @@ -1607,6 +1823,371 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationControlOutput) { TF_EXPECT_FUNCTIONDEFLIBRARY_EQ(library_expected, library); } +// Test with two outside_compilation clusters that interact outside the compiled +// subgraph, where the ancestor has no HostCompute Op. +TEST(EncapsulateSubgraphsTest, + OutsideCompilationClusterDependencyNoSrcCluster) { + FunctionDefLibrary library; + GraphDef graphdef; + + { + GraphDefBuilder b1(GraphDefBuilder::kFailImmediately); + Node* a = Input(b1.opts().WithName("A")); + Node* b = Input(b1.opts().WithName("B")); + Node* c = Unary(a, b1.opts().WithName("C").WithAttr("_encapsulate", "F1")); + Node* d = + Binary(b, c, b1.opts().WithName("D").WithAttr("_encapsulate", "F1")); + Node* e = Unary(a, b1.opts() + .WithName("E") + .WithAttr("_encapsulate", "F1") + .WithAttr("_outside", "O1")); + Node* f = Unary(d, b1.opts().WithName("F").WithAttr("_encapsulate", "F1")); + Node* g = Unary(f, b1.opts() + .WithName("G") + .WithAttr("_encapsulate", "F1") + .WithAttr("_outside", "O2") + .WithControlInput(e)); + Node* h = Unary(g, b1.opts().WithName("H").WithAttr("_encapsulate", "F1")); + Binary(e, h, b1.opts().WithName("I")); + TF_EXPECT_OK(b1.ToGraphDef(&graphdef)); + } + + TF_EXPECT_OK(Encapsulate(&graphdef, &library)); + + FunctionDefLibrary library_expected; + GraphDef graphdef_expected; + + { + GraphDefBuilder shape2(GraphDefBuilder::kFailImmediately); + Node* key_constant = + KeyPlaceholderShape(shape2.opts().WithName("KnownShape/_0")); + Node* recv2 = RecvAtHost(ops::NodeOut(key_constant, 0), "F1", "O2", + {DT_FLOAT}, shape2.opts()); + Node* g = Unary(ops::NodeOut(recv2, 0), shape2.opts() + .WithName("G") + .WithAttr("_encapsulate", "F1") + .WithAttr("_outside", "O2")); + SendFromHost(ops::NodeOut(key_constant, 0), "F1", "O2", {g}, shape2.opts()); + TF_EXPECT_OK( + AddGraphDefToFunctionLibrary(shape2, "F1_O2", &library_expected)); + } + + *library_expected.add_function() = FunctionDefHelper::Create( + "F1", {"a_0_arg:float", "b_0_arg:float"}, {"h_0_retval:float"}, {}, + { + {{"C"}, "UnaryTest", {"a_0_arg"}}, + {{"D"}, "BinaryTest", {"b_0_arg", "C:o:0"}}, + {{"F"}, "UnaryTest", {"D:o:0"}}, + {{"H"}, + "UnaryTest", + {"outside_compilation_O2_host_compute:outputs:0"}}, + {{"outside_compilation_O2_host_compute"}, + "XlaHostCompute", + {"F:o:0"}, + {{"Tinputs", gtl::ArraySlice({DT_FLOAT})}, + {"Toutputs", gtl::ArraySlice({DT_FLOAT})}, + {"ancestors", gtl::ArraySlice({})}, + {"key", "host_compute_channel_F1_O2"}, + {"shape_inference_graph", + "_outside_compilation_shape_inference_F1_O2"}, + {"shapes", gtl::ArraySlice({})}, + {"_outside_compilation_subgraph", "O2"}}}, + }, + {{"h_0_retval", "H:o:0"}}); + + { + std::unique_ptr lib_def( + new FunctionLibraryDefinition(OpRegistry::Global(), library_expected)); + GraphDefBuilder b2(GraphDefBuilder::kFailImmediately, lib_def.get()); + Node* a = Input(b2.opts().WithName("A")); + Node* b = Input(b2.opts().WithName("B")); + + Node* e = Unary(a, b2.opts() + .WithName("E") + .WithAttr("_encapsulate", "F1") + .WithAttr("_outside", "O1")); + Node* key_constant = + KeyPlaceholder("F1", b2.opts().WithName("F1_key_placeholder")); + Node* recv = RecvAtHost(ops::NodeOut(key_constant, 0), "F1", "O2", + {DT_FLOAT}, b2.opts()); + Node* g = Unary(recv, b2.opts() + .WithName("G") + .WithAttr("_encapsulate", "F1") + .WithAttr("_outside", "O2") + .WithControlInput(e)); + Node* send = + SendFromHost(ops::NodeOut(key_constant, 0), "F1", "O2", {g}, b2.opts()); + Node* s1 = Sequencer( + b2.opts().WithName("F1_sequencer").WithControlInputs({recv, send}), + "F1"); + NodeBuilder node_builder1("F1", "F1", lib_def.get()); + node_builder1.Input(a).Input(b).ControlInput(s1); + Node* call1 = b2.opts().FinalizeBuilder(&node_builder1); + + Binary(e, call1, b2.opts().WithName("I")); + TF_EXPECT_OK(b2.ToGraphDef(&graphdef_expected)); + } + + TF_EXPECT_GRAPH_EQ(graphdef_expected, graphdef); + TF_EXPECT_FUNCTIONDEFLIBRARY_EQ(library_expected, library); +} + +// Test with two outside_compilation clusters that interact outside the compiled +// subgraph, where the successor has no HostCompute Op. +TEST(EncapsulateSubgraphsTest, + OutsideCompilationClusterDependencyNoDstCluster) { + FunctionDefLibrary library; + GraphDef graphdef; + + { + GraphDefBuilder b1(GraphDefBuilder::kFailImmediately); + Node* a = Input(b1.opts().WithName("A")); + Node* b = Input(b1.opts().WithName("B")); + Node* c = Unary(a, b1.opts().WithName("C").WithAttr("_encapsulate", "F1")); + Node* d = + Binary(b, c, b1.opts().WithName("D").WithAttr("_encapsulate", "F1")); + Node* e = Unary(d, b1.opts() + .WithName("E") + .WithAttr("_encapsulate", "F1") + .WithAttr("_outside", "O1")); + Node* f = Unary(e, b1.opts().WithName("F").WithAttr("_encapsulate", "F1")); + /*Node* g =*/Unary(a, b1.opts() + .WithName("G") + .WithAttr("_encapsulate", "F1") + .WithAttr("_outside", "O2") + .WithControlInput(e)); + Node* h = Unary(f, b1.opts().WithName("H").WithAttr("_encapsulate", "F1")); + Binary(e, h, b1.opts().WithName("I")); + TF_EXPECT_OK(b1.ToGraphDef(&graphdef)); + } + + TF_EXPECT_OK(Encapsulate(&graphdef, &library)); + + FunctionDefLibrary library_expected; + GraphDef graphdef_expected; + + { + GraphDefBuilder shape1(GraphDefBuilder::kFailImmediately); + Node* key_constant = + KeyPlaceholderShape(shape1.opts().WithName("KnownShape/_0")); + Node* recv2 = RecvAtHost(ops::NodeOut(key_constant, 0), "F1", "O1", + {DT_FLOAT}, shape1.opts()); + Node* e = Unary(ops::NodeOut(recv2, 0), shape1.opts() + .WithName("E") + .WithAttr("_encapsulate", "F1") + .WithAttr("_outside", "O1")); + SendFromHost(ops::NodeOut(key_constant, 0), "F1", "O1", {e}, shape1.opts()); + TF_EXPECT_OK( + AddGraphDefToFunctionLibrary(shape1, "F1_O1", &library_expected)); + } + + *library_expected.add_function() = FunctionDefHelper::Create( + "F1", {"a_0_arg:float", "b_0_arg:float"}, {"h_0_retval:float"}, {}, + { + {{"C"}, "UnaryTest", {"a_0_arg"}}, + {{"D"}, "BinaryTest", {"b_0_arg", "C:o:0"}}, + {{"F"}, + "UnaryTest", + {"outside_compilation_O1_host_compute:outputs:0"}}, + {{"H"}, "UnaryTest", {"F:o:0"}}, + {{"outside_compilation_O1_host_compute"}, + "XlaHostCompute", + {"D:o:0"}, + {{"Tinputs", gtl::ArraySlice({DT_FLOAT})}, + {"Toutputs", gtl::ArraySlice({DT_FLOAT})}, + {"ancestors", gtl::ArraySlice({})}, + {"key", "host_compute_channel_F1_O1"}, + {"shape_inference_graph", + "_outside_compilation_shape_inference_F1_O1"}, + {"shapes", gtl::ArraySlice({})}, + {"_outside_compilation_subgraph", "O1"}}}, + }, + {{"h_0_retval", "H:o:0"}}); + + { + std::unique_ptr lib_def( + new FunctionLibraryDefinition(OpRegistry::Global(), library_expected)); + GraphDefBuilder b2(GraphDefBuilder::kFailImmediately, lib_def.get()); + Node* a = Input(b2.opts().WithName("A")); + Node* b = Input(b2.opts().WithName("B")); + + Node* key_constant = + KeyPlaceholder("F1", b2.opts().WithName("F1_key_placeholder")); + Node* recv = RecvAtHost(ops::NodeOut(key_constant, 0), "F1", "O1", + {DT_FLOAT}, b2.opts()); + Node* e = Unary(recv, b2.opts() + .WithName("E") + .WithAttr("_encapsulate", "F1") + .WithAttr("_outside", "O1")); + Node* send = + SendFromHost(ops::NodeOut(key_constant, 0), "F1", "O1", {e}, b2.opts()); + /*Node* g =*/Unary(a, b2.opts() + .WithName("G") + .WithAttr("_encapsulate", "F1") + .WithAttr("_outside", "O2") + .WithControlInput(e)); + Node* s1 = Sequencer( + b2.opts().WithName("F1_sequencer").WithControlInputs({recv, send}), + "F1"); + NodeBuilder node_builder1("F1", "F1", lib_def.get()); + node_builder1.Input(a).Input(b).ControlInput(s1); + Node* call1 = b2.opts().FinalizeBuilder(&node_builder1); + + Binary(e, call1, b2.opts().WithName("I")); + TF_EXPECT_OK(b2.ToGraphDef(&graphdef_expected)); + } + + TF_EXPECT_GRAPH_EQ(graphdef_expected, graphdef); + TF_EXPECT_FUNCTIONDEFLIBRARY_EQ(library_expected, library); +} + +// Test with two outside_compilation clusters that interact outside the compiled +// subgraph. +TEST(EncapsulateSubgraphsTest, OutsideCompilationClusterDependency) { + FunctionDefLibrary library; + GraphDef graphdef; + + { + GraphDefBuilder b1(GraphDefBuilder::kFailImmediately); + Node* a = Input(b1.opts().WithName("A")); + Node* b = Input(b1.opts().WithName("B")); + Node* c = Unary(a, b1.opts().WithName("C").WithAttr("_encapsulate", "F1")); + Node* d = + Binary(b, c, b1.opts().WithName("D").WithAttr("_encapsulate", "F1")); + Node* e = Unary(d, b1.opts() + .WithName("E") + .WithAttr("_encapsulate", "F1") + .WithAttr("_outside", "O1")); + Node* f = Unary(e, b1.opts().WithName("F").WithAttr("_encapsulate", "F1")); + Node* g = Unary(d, b1.opts() + .WithName("G") + .WithAttr("_encapsulate", "F1") + .WithAttr("_outside", "O2") + .WithControlInput(e)); + Node* h = Unary(f, b1.opts().WithName("H").WithAttr("_encapsulate", "F1")); + /*Node* i =*/Binary(d, e, + b1.opts() + .WithName("I") + .WithAttr("_encapsulate", "F1") + .WithAttr("_outside", "O3") + .WithControlInput(g)); + Binary(e, h, b1.opts().WithName("J")); + TF_EXPECT_OK(b1.ToGraphDef(&graphdef)); + } + + TF_EXPECT_OK(Encapsulate(&graphdef, &library)); + + FunctionDefLibrary library_expected; + GraphDef graphdef_expected; + + { + GraphDefBuilder shape1(GraphDefBuilder::kFailImmediately); + Node* key_constant = + KeyPlaceholderShape(shape1.opts().WithName("KnownShape/_0")); + Node* recv2 = RecvAtHost(ops::NodeOut(key_constant, 0), "F1", "O1", + {DT_FLOAT}, shape1.opts()); + Node* e = Unary(ops::NodeOut(recv2, 0), shape1.opts() + .WithName("E") + .WithAttr("_encapsulate", "F1") + .WithAttr("_outside", "O1")); + SendFromHost(ops::NodeOut(key_constant, 0), "F1", "O1", {e}, shape1.opts()); + TF_EXPECT_OK( + AddGraphDefToFunctionLibrary(shape1, "F1_O1", &library_expected)); + } + + *library_expected.add_function() = FunctionDefHelper::Create( + "F1", {"a_0_arg:float", "b_0_arg:float"}, {"h_0_retval:float"}, {}, + {{{"C"}, "UnaryTest", {"a_0_arg"}}, + {{"D"}, "BinaryTest", {"b_0_arg", "C:o:0"}}, + {{"F"}, "UnaryTest", {"outside_compilation_O1_host_compute:outputs:0"}}, + {{"H"}, "UnaryTest", {"F:o:0"}}, + {{"outside_compilation_O1_host_compute"}, + "XlaHostCompute", + {"D:o:0"}, + {{"Tinputs", gtl::ArraySlice({DT_FLOAT})}, + {"Toutputs", gtl::ArraySlice({DT_FLOAT})}, + {"ancestors", gtl::ArraySlice({})}, + {"key", "host_compute_channel_F1_O1"}, + {"shape_inference_graph", + "_outside_compilation_shape_inference_F1_O1"}, + {"shapes", gtl::ArraySlice({})}, + {"_outside_compilation_subgraph", "O1"}}}, + {{"outside_compilation_O2_host_compute"}, + "XlaHostCompute", + {"D:o:0"}, + {{"Tinputs", gtl::ArraySlice({DT_FLOAT})}, + {"Toutputs", gtl::ArraySlice({})}, + {"ancestors", + gtl::ArraySlice({"outside_compilation_O1_host_compute"})}, + {"key", "host_compute_channel_F1_O2"}, + {"shape_inference_graph", ""}, + {"shapes", gtl::ArraySlice({})}, + {"_outside_compilation_subgraph", "O2"}}, + {"outside_compilation_O1_host_compute"}}, + {{"outside_compilation_O3_host_compute"}, + "XlaHostCompute", + {"D:o:0"}, + {{"Tinputs", gtl::ArraySlice({DT_FLOAT})}, + {"Toutputs", gtl::ArraySlice({})}, + {"ancestors", + gtl::ArraySlice({"outside_compilation_O1_host_compute", + "outside_compilation_O2_host_compute"})}, + {"key", "host_compute_channel_F1_O3"}, + {"shape_inference_graph", ""}, + {"shapes", gtl::ArraySlice({})}, + {"_outside_compilation_subgraph", "O3"}}, + {"outside_compilation_O1_host_compute", + "outside_compilation_O2_host_compute"}}}, + {{"h_0_retval", "H:o:0"}}); + + { + std::unique_ptr lib_def( + new FunctionLibraryDefinition(OpRegistry::Global(), library_expected)); + GraphDefBuilder b2(GraphDefBuilder::kFailImmediately, lib_def.get()); + Node* a = Input(b2.opts().WithName("A")); + Node* b = Input(b2.opts().WithName("B")); + + Node* key_constant = + KeyPlaceholder("F1", b2.opts().WithName("F1_key_placeholder")); + Node* recv1 = RecvAtHost(ops::NodeOut(key_constant, 0), "F1", "O1", + {DT_FLOAT}, b2.opts()); + Node* e = Unary(recv1, b2.opts() + .WithName("E") + .WithAttr("_encapsulate", "F1") + .WithAttr("_outside", "O1")); + Node* send = + SendFromHost(ops::NodeOut(key_constant, 0), "F1", "O1", {e}, b2.opts()); + Node* recv2 = RecvAtHost(ops::NodeOut(key_constant, 0), "F1", "O2", + {DT_FLOAT}, b2.opts()); + Node* g = Unary(recv2, b2.opts() + .WithName("G") + .WithAttr("_encapsulate", "F1") + .WithAttr("_outside", "O2") + .WithControlInput(e)); + Node* recv3 = RecvAtHost(ops::NodeOut(key_constant, 0), "F1", "O3", + {DT_FLOAT}, b2.opts()); + /*Node* i =*/Binary(recv3, e, + b2.opts() + .WithName("I") + .WithAttr("_encapsulate", "F1") + .WithAttr("_outside", "O3") + .WithControlInput(g)); + Node* s1 = Sequencer(b2.opts() + .WithName("F1_sequencer") + .WithControlInputs({recv1, send, recv2, recv3}), + "F1"); + NodeBuilder node_builder1("F1", "F1", lib_def.get()); + node_builder1.Input(a).Input(b).ControlInput(s1); + Node* call1 = b2.opts().FinalizeBuilder(&node_builder1); + + Binary(e, call1, b2.opts().WithName("J")); + TF_EXPECT_OK(b2.ToGraphDef(&graphdef_expected)); + } + + TF_EXPECT_GRAPH_EQ(graphdef_expected, graphdef); + TF_EXPECT_FUNCTIONDEFLIBRARY_EQ(library_expected, library); +} + // Test with one outside_compilation cluster that has no outputs from the // compiled subgraph. TEST(EncapsulateSubgraphsTest, OutsideCompilationNoInputsOrOutputs) { @@ -1731,6 +2312,7 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationShapeInference) { {"c:o:0"}, {{"Tinputs", gtl::ArraySlice({DT_FLOAT})}, {"Toutputs", gtl::ArraySlice({DT_FLOAT})}, + {"ancestors", gtl::ArraySlice({})}, {"key", "host_compute_channel_F1_O1"}, {"shape_inference_graph", "_outside_compilation_shape_inference_F1_O1"}, diff --git a/tensorflow/compiler/tf2xla/xla_compiler.cc b/tensorflow/compiler/tf2xla/xla_compiler.cc index 86263d847a..c0e9967684 100644 --- a/tensorflow/compiler/tf2xla/xla_compiler.cc +++ b/tensorflow/compiler/tf2xla/xla_compiler.cc @@ -813,4 +813,29 @@ Status XlaCompiler::SetHostToDeviceMetadata( return Status::OK(); } +Status XlaCompiler::GetHostComputeControlDependency( + const string& host_compute_name, xla::ComputationDataHandle* handle) { + const auto iter = host_compute_control_output_.find(host_compute_name); + if (iter == host_compute_control_output_.end()) { + return errors::InvalidArgument( + "No registered control handle for host compute Op '", host_compute_name, + "'"); + } else { + *handle = iter->second; + } + return Status::OK(); +} + +Status XlaCompiler::SetHostComputeControlDependency( + const string& host_compute_name, const xla::ComputationDataHandle& handle) { + if (host_compute_control_output_.find(host_compute_name) != + host_compute_control_output_.end()) { + return errors::InvalidArgument( + "Duplicate control handles registered for for host compute Op ", + host_compute_name); + } + host_compute_control_output_[host_compute_name] = handle; + return Status::OK(); +} + } // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/xla_compiler.h b/tensorflow/compiler/tf2xla/xla_compiler.h index a6747bbe72..8f564f35ec 100644 --- a/tensorflow/compiler/tf2xla/xla_compiler.h +++ b/tensorflow/compiler/tf2xla/xla_compiler.h @@ -325,6 +325,23 @@ class XlaCompiler { gtl::ArraySlice types, gtl::ArraySlice shapes); + // In order to avoid deadlocks from dependencies in host computations, it can + // be necessary to enforce a partial order on the execution of HostCompute + // Ops. In particular it may be necessary to constrain the SendToHost for one + // HostCompute to run before blocking on the RecvAtHost for another + // HostCompute. The compiler maintains a mapping from 'host_compute_name' to + // handle, where the handle is an 'output' of the HostCompute Op corresponding + // to 'host_compute_name'. Another HostCompute Op that needs to be sequenced + // later can add the handle as an 'input' to enforce the constraints. + // 'host_compute_name' can be any string the client wishes to use to identify + // a given HostCompute Op as long as the names are unique within the + // compilation. + Status GetHostComputeControlDependency(const string& host_compute_name, + xla::ComputationDataHandle* handle); + Status SetHostComputeControlDependency( + const string& host_compute_name, + const xla::ComputationDataHandle& handle); + const Options& options() const { return options_; } xla::Client* client() const { return options_.client; } FunctionLibraryRuntime* flib_runtime() const { return flib_runtime_; } @@ -391,6 +408,9 @@ class XlaCompiler { std::unordered_map host_compute_sends_; std::unordered_map host_compute_recvs_; + std::unordered_map + host_compute_control_output_; + TF_DISALLOW_COPY_AND_ASSIGN(XlaCompiler); }; -- GitLab From d82d04f15992e224743f29aa75134ed04aa064a7 Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Fri, 20 Apr 2018 13:58:51 -0700 Subject: [PATCH 094/434] Automated g4 rollback of changelist 193694958 PiperOrigin-RevId: 193718607 --- .../core/distributed_runtime/master_session.cc | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/tensorflow/core/distributed_runtime/master_session.cc b/tensorflow/core/distributed_runtime/master_session.cc index 1c67b42e76..ebe350d313 100644 --- a/tensorflow/core/distributed_runtime/master_session.cc +++ b/tensorflow/core/distributed_runtime/master_session.cc @@ -89,10 +89,6 @@ class MasterSession::ReffedClientGraph : public core::RefCounted { ~ReffedClientGraph() override { if (should_deregister_) { DeregisterPartitions(); - } else { - for (Part& part : partitions_) { - worker_cache_->ReleaseWorker(part.name, part.worker); - } } } @@ -1178,8 +1174,14 @@ Status MasterSession::Create(GraphDef* graph_def, TF_RETURN_IF_ERROR(GraphExecutionState::MakeForBaseGraph( graph_def, execution_options, &execution_state_)); } - should_delete_worker_sessions_ = true; - return CreateWorkerSessions(options); + // TODO(b/36574172): Remove these conditions when ClusterSpec + // propagation is supported in all servers. + if (options.cluster_def != nullptr || + session_opts_.config.isolate_session_state()) { + should_delete_worker_sessions_ = true; + return CreateWorkerSessions(options); + } + return Status::OK(); } Status MasterSession::CreateWorkerSessions( -- GitLab From 9fc5bacba49eb31c7d536963879ccc62ecfbaf76 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 20 Apr 2018 14:25:57 -0700 Subject: [PATCH 095/434] Pin rbe-debian8-tf container tp a newer base image - Also improve how numpy is installed (not compiling from source) for containers based on other distros than Ubuntu14.04 PiperOrigin-RevId: 193722848 --- tensorflow/tools/ci_build/Dockerfile.rbe.cpu | 2 +- .../tools/ci_build/install/install_pip_packages.sh | 9 +++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/tensorflow/tools/ci_build/Dockerfile.rbe.cpu b/tensorflow/tools/ci_build/Dockerfile.rbe.cpu index 6f0798b1af..3bc52b9ed6 100644 --- a/tensorflow/tools/ci_build/Dockerfile.rbe.cpu +++ b/tensorflow/tools/ci_build/Dockerfile.rbe.cpu @@ -1,4 +1,4 @@ -FROM launcher.gcr.io/google/rbe-debian8:r322167 +FROM launcher.gcr.io/google/rbe-debian8:r327695 LABEL maintainer="Yu Yi " # Copy install scripts diff --git a/tensorflow/tools/ci_build/install/install_pip_packages.sh b/tensorflow/tools/ci_build/install/install_pip_packages.sh index 9644277fab..5aaf544afd 100755 --- a/tensorflow/tools/ci_build/install/install_pip_packages.sh +++ b/tensorflow/tools/ci_build/install/install_pip_packages.sh @@ -65,8 +65,13 @@ rm -rf /usr/lib/python3/dist-packages/six* # numpy needs to be installed from source to fix segfaults. See: # https://github.com/tensorflow/tensorflow/issues/6968 # This workaround isn't needed for Ubuntu 16.04 or later. -pip2 install --no-binary=:all: --upgrade numpy==1.12.0 -pip3 install --no-binary=:all: --upgrade numpy==1.12.0 +if $(cat /etc/*-release | grep -q 14.04); then + pip2 install --no-binary=:all: --upgrade numpy==1.12.0 + pip3 install --no-binary=:all: --upgrade numpy==1.12.0 +else + pip2 install --upgrade numpy==1.12.0 + pip3 install --upgrade numpy==1.12.0 +fi pip2 install scipy==0.18.1 pip3 install scipy==0.18.1 -- GitLab From 9f312f32091534bfc115212d2ec7c838180df663 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 20 Apr 2018 14:30:48 -0700 Subject: [PATCH 096/434] Updating Generate Random Tensor to generate tensors whose values are small and do not cause overflow for arithmetic operations. PiperOrigin-RevId: 193723661 --- tensorflow/core/grappler/optimizers/BUILD | 1 - tensorflow/core/grappler/utils/BUILD | 1 + tensorflow/core/grappler/utils/grappler_test.h | 4 +++- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD index 3ab8d8f584..42c3580d40 100644 --- a/tensorflow/core/grappler/optimizers/BUILD +++ b/tensorflow/core/grappler/optimizers/BUILD @@ -112,7 +112,6 @@ tf_cc_test( name = "constant_folding_test", srcs = ["constant_folding_test.cc"], shard_count = 5, - tags = ["noasan"], deps = [ ":constant_folding", "//tensorflow/cc:cc_ops", diff --git a/tensorflow/core/grappler/utils/BUILD b/tensorflow/core/grappler/utils/BUILD index b473f32c45..44ef4a965b 100644 --- a/tensorflow/core/grappler/utils/BUILD +++ b/tensorflow/core/grappler/utils/BUILD @@ -128,6 +128,7 @@ cc_library( "//tensorflow/core:direct_session", "//tensorflow/core:framework", "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", "//tensorflow/core:protos_all_cc", "//tensorflow/core:test", "//tensorflow/core/grappler:grappler_item", diff --git a/tensorflow/core/grappler/utils/grappler_test.h b/tensorflow/core/grappler/utils/grappler_test.h index e1394b9c35..c2ba5ee7e8 100644 --- a/tensorflow/core/grappler/utils/grappler_test.h +++ b/tensorflow/core/grappler/utils/grappler_test.h @@ -23,6 +23,7 @@ limitations under the License. #include "tensorflow/core/framework/types.h" #include "tensorflow/core/grappler/grappler_item.h" #include "tensorflow/core/grappler/utils.h" +#include "tensorflow/core/lib/random/random.h" #include "tensorflow/core/platform/test.h" #include "tensorflow/core/public/session_options.h" @@ -62,7 +63,8 @@ class GrapplerTest : public ::testing::Test { Tensor GenerateRandomTensor(const TensorShape& shape) const { typedef typename EnumToDataType::Type T; Tensor tensor(DTYPE, shape); - tensor.flat() = tensor.flat().random(); + for (auto i = 0; i < tensor.NumElements(); i++) + tensor.flat()(i) = i + random::New64() % 10; return tensor; } -- GitLab From bc78f9b060cece8e29a89f7dbcdedcadbc61891d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 20 Apr 2018 14:32:07 -0700 Subject: [PATCH 097/434] internal END_PUBLIC BEGIN_PUBLIC Automated g4 rollback of changelist 193600682 PiperOrigin-RevId: 193723856 --- .../layers/python/layers/rev_block_lib.py | 77 ++----------- .../python/layers/rev_block_lib_test.py | 102 ------------------ 2 files changed, 11 insertions(+), 168 deletions(-) diff --git a/tensorflow/contrib/layers/python/layers/rev_block_lib.py b/tensorflow/contrib/layers/python/layers/rev_block_lib.py index 9f904cc302..02d294c68f 100644 --- a/tensorflow/contrib/layers/python/layers/rev_block_lib.py +++ b/tensorflow/contrib/layers/python/layers/rev_block_lib.py @@ -45,7 +45,6 @@ from tensorflow.python.ops import math_ops from tensorflow.python.ops import variable_scope from tensorflow.python.platform import tf_logging as logging from tensorflow.python.util import nest -from tensorflow.python.util import tf_inspect __all__ = ["rev_block", "RevBlock", "recompute_grad"] @@ -430,13 +429,12 @@ def enable_with_args(dec): @enable_with_args -def recompute_grad(fn, use_data_dep=_USE_DEFAULT, tupleize_grads=False, - tensor_arg_names=None): +def recompute_grad(fn, use_data_dep=_USE_DEFAULT, tupleize_grads=False): """Decorator that recomputes the function on the backwards pass. Args: - fn: the subgraph-producing function to wrap and recompute when computing - gradients. Provide `tensor_arg_names` if not all arguments are `Tensor`s. + fn: a function that takes Tensors (all as positional arguments) and returns + a tuple of Tensors. use_data_dep: `bool`, if `True` will use a dummy data dependency to force the recompute to happen. If `False` will use a control dependency. By default will be `True` if in an XLA context and `False` otherwise. XLA @@ -445,25 +443,17 @@ def recompute_grad(fn, use_data_dep=_USE_DEFAULT, tupleize_grads=False, that all gradients are produced before any are consumed by downstream ops. If `use_data_dep` is also `True`, will use a data dependency instead of a control dependency. - tensor_arg_names: `list`, names of the `Tensor` arguments to `fn`. If - `None`, assumes all arguments are `Tensor`s. Returns: A wrapped fn that is identical to fn when called, but its activations will be discarded and recomputed on the backwards pass (i.e. on a call to tf.gradients). """ - if tensor_arg_names: - if not isinstance(tensor_arg_names, (list, tuple)): - raise TypeError("tensor_arg_names must be a list") @functools.wraps(fn) - def wrapped(*args, **kwargs): - tensor_only_fn, tensor_args = _make_tensor_only(fn, args, kwargs, - tensor_arg_names) + def wrapped(*args): return _recompute_grad( - tensor_only_fn, tensor_args, use_data_dep=use_data_dep, - tupleize_grads=tupleize_grads) + fn, args, use_data_dep=use_data_dep, tupleize_grads=tupleize_grads) return wrapped @@ -473,59 +463,11 @@ def _is_on_tpu(): return control_flow_util.GetContainingXLAContext(ctxt) is not None -def _make_tensor_only(fn, args, kwargs, tensor_arg_names): - """Return fn such that it only takes Tensor args for tensor_arg_names.""" - argspec = tf_inspect.getargspec(fn) - if argspec.varargs is not None or argspec.keywords is not None: - raise ValueError("Function decorated with recompute_grad must not use " - "*args or **kwargs.") - fn_arg_names = list(argspec.args) - - # name_to_arg is a dict of argument name to argument value, including both - # positional and keyword arguments passed. - name_to_arg = {} - # Populate positional arguments. - for name, arg in zip(fn_arg_names[:len(args)], args): - name_to_arg[name] = arg - # Populate keyword arguments. - name_to_arg.update(kwargs) - - # Separate the Tensor arguments from the non-Tensor arguments. - # The default is that all arguments are Tensor arguments. - tensor_arg_names = tensor_arg_names or fn_arg_names - for name in tensor_arg_names: - if name not in name_to_arg: - raise ValueError("Must provide Tensor argument %s" % name) - tensor_args = [name_to_arg[name] for name in tensor_arg_names] - non_tensor_kwargs = dict([(name, arg) for name, arg in name_to_arg.items() - if name not in tensor_arg_names]) - - # Check that Tensor arguments are in fact Tensors and that non-Tensor - # arguments are not. - for name, arg in zip(tensor_arg_names, tensor_args): - if not isinstance(arg, framework_ops.Tensor): - raise TypeError("Fn argument %s must be a Tensor." % name) - for name, arg in non_tensor_kwargs.items(): - if isinstance(arg, framework_ops.Tensor): - raise TypeError("Fn argument %s must not be a Tensor." % name) - - # Construct a Tensor-only wrapper function that will pass the non-Tensor - # arguments as well when called. - def tensor_only_fn(*tensors): - all_kwargs = dict(zip(tensor_arg_names, tensors)) - all_kwargs.update(non_tensor_kwargs) - return fn(**all_kwargs) - - return tensor_only_fn, tensor_args - - -def _recompute_grad(fn, args, use_data_dep=_USE_DEFAULT, - tupleize_grads=False): +def _recompute_grad(fn, args, use_data_dep=_USE_DEFAULT, tupleize_grads=False): """See recompute_grad.""" for arg in args: if not isinstance(arg, framework_ops.Tensor): raise ValueError("All inputs to function must be Tensors") - use_data_dep_ = use_data_dep if use_data_dep_ == _USE_DEFAULT: use_data_dep_ = _is_on_tpu() @@ -559,11 +501,14 @@ def _recompute_grad(fn, args, use_data_dep=_USE_DEFAULT, grad_vars = grads[len(inputs):] return grad_inputs, grad_vars - # TODO(rsepassi): Replace with tf.custom_gradient @_fn_with_custom_grad(grad_fn) def fn_with_recompute(*args): cached_vs.append(variable_scope.get_variable_scope()) - cached_arg_scope.append(contrib_framework_ops.current_arg_scope()) + # TODO(rsepassi): Rm conditional in TF 1.4 + if hasattr(contrib_framework_ops, "current_arg_scope"): + cached_arg_scope.append(contrib_framework_ops.current_arg_scope()) + else: + cached_arg_scope.append({}) return fn(*args) return fn_with_recompute(*args) diff --git a/tensorflow/contrib/layers/python/layers/rev_block_lib_test.py b/tensorflow/contrib/layers/python/layers/rev_block_lib_test.py index 66ccc696f9..392a490be1 100644 --- a/tensorflow/contrib/layers/python/layers/rev_block_lib_test.py +++ b/tensorflow/contrib/layers/python/layers/rev_block_lib_test.py @@ -318,108 +318,6 @@ class RecomputeTest(test.TestCase): self.assertEqual(1, len(grads)) self.assertTrue(grads[0] is not None) - def testWithNontensorArgs(self): - @rev_block_lib.recompute_grad(tupleize_grads=True, - tensor_arg_names=["inputs"]) - def layer_with_recompute(inputs, plus=None): - var = variable_scope.get_variable("var", ()) - self.assertFalse(plus) # called with False below - if plus: - return var + inputs - else: - return var * inputs - - inputs = array_ops.ones((), dtypes.float32) - outputs = layer_with_recompute(inputs, plus=False) - loss = math_ops.square(outputs) - grads = gradients_impl.gradients(loss, variables.trainable_variables()) - self.assertEqual(1, len(grads)) - self.assertTrue(grads[0] is not None) - - -class MakeTensorOnlyTest(test.TestCase): - - def testMakeTensorOnly(self): - def fn(a, b, c, d=1, e=None, f=7): - return (a, b, c, d, e, f) - - t1 = array_ops.ones(()) - t2 = array_ops.ones(()) - t3 = array_ops.ones(()) - args = [1, t1, 3, t2] - kwargs = {"e": t3} - tensor_only_fn, tensor_args = rev_block_lib._make_tensor_only( - fn, args, kwargs, ["b", "d", "e"]) - self.assertAllEqual(tensor_args, [t1, t2, t3]) - out = tensor_only_fn(*tensor_args) - self.assertAllEqual(out, (1, t1, 3, t2, t3, 7)) - - def testMakeTensorOnlyPositionalArgsOnly(self): - def fn(a, b, c): - return (a, b, c) - - t1 = array_ops.ones(()) - t2 = array_ops.ones(()) - args = [t1, 3, t2] - tensor_only_fn, tensor_args = rev_block_lib._make_tensor_only( - fn, args, {}, ["a", "c"]) - self.assertAllEqual(tensor_args, [t1, t2]) - out = tensor_only_fn(*tensor_args) - self.assertAllEqual(out, (t1, 3, t2)) - - def testMakeTensorOnlyKwargsArgsOnly(self): - def fn(a=1, b=2, c=3): - return (a, b, c) - - t1 = array_ops.ones(()) - t2 = array_ops.ones(()) - args = [t1] - kwargs = {"c": t2} - tensor_only_fn, tensor_args = rev_block_lib._make_tensor_only( - fn, args, kwargs, ["a", "c"]) - self.assertAllEqual(tensor_args, [t1, t2]) - out = tensor_only_fn(*tensor_args) - self.assertAllEqual(out, (t1, 2, t2)) - - def testErrorOnMissingTensorArg(self): - def fn(a, b): - return (a, b) - - with self.assertRaisesWithPredicateMatch( - ValueError, "provide Tensor argument"): - rev_block_lib._make_tensor_only(fn, [], {"b": 2}, ["a"]) - - def testErrorOnSignatureSplats(self): - def fn1(a, *args): - return (a, args) - - err_msg = r"must not use \*args or \*\*kwargs" - with self.assertRaisesWithPredicateMatch(ValueError, err_msg): - rev_block_lib._make_tensor_only(fn1, [1, 2], {}, ["a"]) - - def fn2(a, **kwargs): - return (a, kwargs) - - with self.assertRaisesWithPredicateMatch(ValueError, err_msg): - rev_block_lib._make_tensor_only(fn2, [], {"a": 1, "b": 2}, ["a"]) - - def testErrorOnNonTensorForTensor(self): - def fn(a, b): - return (a, b) - - with self.assertRaisesWithPredicateMatch(TypeError, "must be a Tensor"): - rev_block_lib._make_tensor_only(fn, [2, 3], {}, ["a"]) - - def testErrorOnTensorForNonTensor(self): - def fn(a, b): - return (a, b) - - with self.assertRaisesWithPredicateMatch( - TypeError, "must not be a Tensor"): - t1 = array_ops.ones(()) - t2 = array_ops.ones(()) - rev_block_lib._make_tensor_only(fn, [t1, t2], {}, ["a"]) - class FnWithCustomGradTest(test.TestCase): -- GitLab From b133f8c70622e52f19631fd93d4b87ee21c52ac6 Mon Sep 17 00:00:00 2001 From: Akshay Modi Date: Fri, 20 Apr 2018 14:58:56 -0700 Subject: [PATCH 098/434] Move the guts of TFE_Execute into EagerExecute PiperOrigin-RevId: 193728072 --- tensorflow/c/eager/BUILD | 1 - tensorflow/c/eager/c_api.cc | 531 +----------------- tensorflow/core/common_runtime/eager/BUILD | 21 +- .../core/common_runtime/eager/execute.cc | 489 ++++++++++++++++ .../core/common_runtime/eager/execute.h | 7 + 5 files changed, 508 insertions(+), 541 deletions(-) diff --git a/tensorflow/c/eager/BUILD b/tensorflow/c/eager/BUILD index d66386acbd..fae922ea3b 100644 --- a/tensorflow/c/eager/BUILD +++ b/tensorflow/c/eager/BUILD @@ -31,7 +31,6 @@ tf_cuda_library( "//tensorflow/core/common_runtime/eager:context", "//tensorflow/core/common_runtime/eager:eager_executor", "//tensorflow/core/common_runtime/eager:execute", - "//tensorflow/core/common_runtime/eager:execute_node", "//tensorflow/core/common_runtime/eager:kernel_and_device", "//tensorflow/core/common_runtime/eager:tensor_handle", "//tensorflow/core/common_runtime/eager:copy_to_device_node", diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc index b7a3097208..975bde7c7f 100644 --- a/tensorflow/c/eager/c_api.cc +++ b/tensorflow/c/eager/c_api.cc @@ -34,7 +34,6 @@ limitations under the License. #include "tensorflow/core/common_runtime/device_set.h" #include "tensorflow/core/common_runtime/eager/copy_to_device_node.h" #include "tensorflow/core/common_runtime/eager/execute.h" -#include "tensorflow/core/common_runtime/eager/execute_node.h" #include "tensorflow/core/common_runtime/function.h" #include "tensorflow/core/common_runtime/rendezvous_mgr.h" #include "tensorflow/core/framework/node_def_util.h" @@ -219,9 +218,6 @@ TF_Tensor* TFE_TensorHandleResolve(TFE_TensorHandle* h, TF_Status* status) { } return retval; } -} // extern "C" - -extern "C" { TFE_Op* TFE_NewOp(TFE_Context* ctx, const char* op_or_function_name, TF_Status* status) { @@ -423,531 +419,18 @@ void TFE_OpSetAttrFunctionList(TFE_Op* op, const char* attr_name, attr_name, tensorflow::gtl::ArraySlice( funcs.get(), num_values)); } -} // extern "C" - -namespace { - -// Initializes the step stats if needed. -void MaybeInitializeStepStats(tensorflow::StepStats* step_stats, - tensorflow::EagerContext* ctx) { - // Lazily initialize the RunMetadata with information about all devices if - // this is the first call. - while (step_stats->dev_stats_size() < ctx->devices()->size()) { - int device_idx = step_stats->dev_stats_size(); - auto* dev_stats = step_stats->add_dev_stats(); - dev_stats->set_device(ctx->devices()->at(device_idx)->name()); - } -} - -int StepStatsDeviceIndex(tensorflow::StepStats* step_stats, - tensorflow::EagerContext* ctx, - tensorflow::Device* device) { - // Find the current device's index. - if (device == nullptr) { - device = ctx->HostCPU(); - } - for (int i = 0; i < ctx->devices()->size(); ++i) { - if (ctx->devices()->at(i) == device || - ctx->devices()->at(i)->name() == device->name()) { - return i; - } - } - // TODO(apassos) do not fall back to host CPU if device is unknown. - return 0; -} - -tensorflow::Status ValidateInputTypeAndPlacement( - tensorflow::EagerContext* ctx, tensorflow::Device* op_device, - tensorflow::EagerOperation* op, const tensorflow::OpKernel* kernel, - tensorflow::RunMetadata* run_metadata) { - tensorflow::Device* host_device = ctx->HostCPU(); - const tensorflow::MemoryTypeVector& memtypes = kernel->input_memory_types(); - if (memtypes.size() != op->Inputs().size()) { - return tensorflow::errors::InvalidArgument( - "expected ", memtypes.size(), " inputs, got ", op->Inputs().size()); - } - for (int i = 0; i < op->Inputs().size(); ++i) { - const tensorflow::Device* expected_device = - memtypes[i] == tensorflow::HOST_MEMORY ? host_device : op_device; - tensorflow::TensorHandle* handle = op->Inputs()[i]; - tensorflow::Device* handle_device = nullptr; - TF_RETURN_IF_ERROR(handle->Device(&handle_device)); - const tensorflow::Device* actual_device = - handle_device == nullptr ? host_device : handle_device; - if (expected_device != actual_device) { - switch (ctx->GetDevicePlacementPolicy()) { - case tensorflow::DEVICE_PLACEMENT_SILENT_FOR_INT32: - // TODO(xpan): See if we could bubble python related error up - // to python level. - if (handle->dtype == tensorflow::DT_INT32) { - // Note: enabling silent copies of int32 tensors to match behavior - // of graph mode. - break; - } - TF_FALLTHROUGH_INTENDED; - case tensorflow::DEVICE_PLACEMENT_EXPLICIT: - return tensorflow::errors::InvalidArgument( - "Tensors on conflicting devices:" - " cannot compute ", - op->Name(), " as input #", i, " was expected to be on ", - expected_device->name(), " but is actually on ", - actual_device->name(), " (operation running on ", - op_device->name(), ")", - " Tensors can be copied explicitly using .gpu() or .cpu() " - "methods," - " or transparently copied by using tf.enable_eager_execution(" - "device_policy=tfe.DEVICE_PLACEMENT_SILENT). Copying tensors " - "between devices" - " may slow down your model"); - case tensorflow::DEVICE_PLACEMENT_WARN: - LOG(WARNING) << "before computing " << op->Name() << " input #" << i - << " was expected to be on " << expected_device->name() - << " but is actually on " << actual_device->name() - << " (operation running on " << op_device->name() - << "). This triggers a copy which can be a performance " - "bottleneck."; - break; - case tensorflow::DEVICE_PLACEMENT_SILENT: // Do nothing. - break; - } - // We are only here if the policy is warn or silent copies, so we should - // trigger a copy. - auto pre_time = tensorflow::Env::Default()->NowMicros(); - tensorflow::TensorHandle* copied_tensor = nullptr; - tensorflow::Status status = tensorflow::EagerCopyToDevice( - handle, ctx, expected_device->name().c_str(), &copied_tensor); - if (run_metadata != nullptr) { - auto* step_stats = run_metadata->mutable_step_stats(); - MaybeInitializeStepStats(step_stats, ctx); - // Record the sending on the source device for now. - int device_idx = StepStatsDeviceIndex(step_stats, ctx, handle_device); - auto* dev_stats = step_stats->mutable_dev_stats(device_idx); - auto* node_stats = dev_stats->add_node_stats(); - node_stats->set_node_name("_Send"); - node_stats->set_all_start_micros(pre_time); - node_stats->set_op_end_rel_micros( - tensorflow::Env::Default()->NowMicros() - pre_time); - } - if (!status.ok()) { - if (copied_tensor != nullptr) copied_tensor->Unref(); - return tensorflow::errors::Internal( - "Failed copying input tensor from ", actual_device->name(), " to ", - expected_device->name(), " in order to run ", op->Name(), ": ", - status.error_message()); - } - handle->Unref(); - handle = copied_tensor; - (*op->MutableInputs())[i] = copied_tensor; - } - if (handle->dtype != kernel->input_type(i)) { - return tensorflow::errors::InvalidArgument( - "cannot compute ", op->Name(), " as input #", i, - " was expected to be a ", - tensorflow::DataTypeString(kernel->input_type(i)), - " tensor but is a ", tensorflow::DataTypeString(handle->dtype), - " tensor"); - } - } - return tensorflow::Status::OK(); -} - -tensorflow::Device* SelectDevice(const tensorflow::NodeDef& ndef, - tensorflow::EagerContext* ctx, - TF_Status* status) { - tensorflow::DeviceSet ds; - for (tensorflow::Device* d : *ctx->devices()) { - ds.AddDevice(d); - } - tensorflow::DeviceTypeVector final_devices; - status->status = tensorflow::SupportedDeviceTypesForNode( - ds.PrioritizedDeviceTypeList(), ndef, &final_devices); - if (!status->status.ok()) { - return nullptr; - } - if (final_devices.empty()) { - status->status = tensorflow::errors::Internal( - "Could not find valid device for node ", ndef.DebugString()); - return nullptr; - } - for (tensorflow::Device* d : *ctx->devices()) { - if (d->device_type() == final_devices[0].type_string()) { - return d; - } - } - status->status = tensorflow::errors::Unknown( - "Could not find a device for node ", ndef.DebugString()); - return nullptr; -} - -#ifdef TENSORFLOW_EAGER_USE_XLA -// Synthesizes and returns a wrapper function over `op`, which must be a -// primitive op (e.g. matmul). -// -// The wrapper function conforms to the function signature expected by -// _XlaLaunchOp, with input params ordered by . For example, if the op has input params , they will be reordered to as the input params to the synthesized function. -// -// It populates `const_input_types`, `arg_input_types` and -// `op_input_to_func_input` based on the reordering results, that the caller can -// use them to build an _XlaLaunchOp. On error, it returns NULL, and sets -// `status` accordingly. -const tensorflow::FunctionDef* OpToFunction( - TFE_Op* op, std::vector* const_input_types, - std::vector* arg_input_types, - tensorflow::gtl::FlatMap* op_input_to_func_input, - TF_Status* status) { - DCHECK(!op->operation.is_function()); - - tensorflow::FunctionDef fdef; - - // Get the OpDef of the op we are trying to encapsulate. - TFE_Context* ctx = op->operation.ctx; - const tensorflow::OpRegistrationData* op_data; - { - status->status = - ctx->context.FindFunctionOpData(op->operation.Name(), &op_data); - if (!status->status.ok()) { - return nullptr; - } - } - const tensorflow::OpDef& op_def = op_data->op_def; - - tensorflow::OpDef* signature = fdef.mutable_signature(); - - // Handle constant inputs. - const std::unordered_set const_inputs( - *tensorflow::XlaOpRegistry::CompileTimeConstantInputs( - op->operation.Name())); - - // First add place holders for the input args, so that we can refer to them by - // position in the next loop. Also tally up the resource inputs. - int num_resource_inputs = 0; - for (int i = 0; i < op_def.input_arg_size(); ++i) { - if (op_def.input_arg(i).type() == tensorflow::DT_RESOURCE) { - ++num_resource_inputs; - } - signature->add_input_arg(); - } - - // Now we map the input params from `op_def` to `signature`, where the param - // ordering for `signature` is: . - int const_index = 0; - int arg_index = const_inputs.size(); - int resource_index = op_def.input_arg_size() - num_resource_inputs; - for (int i = 0; i < op_def.input_arg_size(); ++i) { - const tensorflow::OpDef::ArgDef& op_input_arg = op_def.input_arg(i); - tensorflow::OpDef::ArgDef* func_input_arg = nullptr; - if (const_inputs.find(op_input_arg.name()) != const_inputs.end()) { - VLOG(1) << "For const input, mapping op input " << i << " to func input " - << const_index; - (*op_input_to_func_input)[i] = const_index; - func_input_arg = signature->mutable_input_arg(const_index++); - const_input_types->push_back( - static_cast(op->operation.Inputs()[i]->dtype)); - } else if (op_input_arg.type() == tensorflow::DT_RESOURCE) { - VLOG(1) << "For resource input, mapping op input " << i - << " to func input " << resource_index; - (*op_input_to_func_input)[i] = resource_index; - func_input_arg = signature->mutable_input_arg(resource_index++); - } else { - VLOG(1) << "For arg input, mapping op input " << i << " to func input " - << arg_index; - (*op_input_to_func_input)[i] = arg_index; - func_input_arg = signature->mutable_input_arg(arg_index++); - arg_input_types->push_back( - static_cast(op->operation.Inputs()[i]->dtype)); - } - - func_input_arg->set_name(op_input_arg.name()); - func_input_arg->set_type(op->operation.Inputs()[i]->dtype); - } - VLOG(1) << "Added OpDef Inputs: " << fdef.DebugString(); - - // Resources args are at the end of the function input params, and we should - // have iterated over all of them. - DCHECK_EQ(signature->input_arg_size(), resource_index); - - // Make the synthesized function's name unique. - signature->set_name(tensorflow::strings::StrCat( - op_def.name(), func_id_generator.fetch_add(1))); - - // Add the node def and set its input names to match op_def's names. - const tensorflow::NodeDef& ndef = - op->operation.MutableAttrs()->BuildNodeDef(); - DCHECK_EQ(signature->input_arg_size(), ndef.input_size()); - *fdef.add_node_def() = ndef; - for (int i = 0; i < op_def.input_arg_size(); ++i) { - fdef.mutable_node_def(0)->set_input(i, op_def.input_arg(i).name()); - } - VLOG(1) << "Added NodeDef: " << fdef.DebugString(); - - // Fix the output names and set output types. - for (int i = 0; i < op_def.output_arg_size(); ++i) { - tensorflow::OpDef::ArgDef* arg = signature->add_output_arg(); - const tensorflow::OpDef::ArgDef& op_def_arg = op_def.output_arg(i); - const string& out_tensor_name = tensorflow::strings::StrCat( - ndef.name(), ":", op_def_arg.name(), ":", 0); - arg->set_name(op_def_arg.name()); - (*fdef.mutable_ret())[op_def_arg.name()] = out_tensor_name; - const string& type_attr = op_def_arg.type_attr(); - if (!type_attr.empty()) { - auto i = ndef.attr().find(type_attr); - if (i == ndef.attr().end()) { - status->status = tensorflow::errors::InvalidArgument( - tensorflow::strings::StrCat("Could not find attr ", type_attr, - " in NodeDef ", ndef.DebugString())); - return nullptr; - } - arg->set_type(i->second.type()); - } - } - VLOG(1) << "Fixed Output names and all types: " << fdef.DebugString(); - - status->status = ctx->context.AddFunctionDef(fdef); - if (!status->status.ok()) return nullptr; - const auto ret = ctx->context.FindFunctionDef(signature->name()); - DCHECK(ret != nullptr); - return ret; -} - -// Builds an _XLALaunchOp as a wrapper over 'op', so that 'op' can be executed -// via XLA. -std::unique_ptr BuildXlaLaunch(TFE_Op* op, TF_Status* status) { - VLOG(1) << "Creating _XlaLaunchOp for TFE_Op " << op->operation.Name(); - auto launch_op = std::unique_ptr( - TFE_NewOp(op->operation.ctx, "_XlaLaunch", status)); - if (TF_GetCode(status) != TF_OK) return nullptr; - if (op->operation.device) { - TFE_OpSetDevice(launch_op.get(), op->operation.device->name().c_str(), - status); - if (TF_GetCode(status) != TF_OK) return nullptr; - } - - const tensorflow::FunctionDef* fdef; - { fdef = op->operation.ctx->FindFunctionDef(op->operation.Name()); } - std::vector const_input_types; - std::vector arg_input_types; - tensorflow::gtl::FlatMap op_input_to_func_input; - if (fdef == nullptr) { - // See if this is a primitive op, and if so create a function for it, so - // that _XlaLaunchOp can access it. - fdef = OpToFunction(op, &const_input_types, &arg_input_types, - &op_input_to_func_input, status); - if (!status->status.ok()) return nullptr; - } else { - // TODO(hongm): XlaOpRegistry::CompileTimeConstantInputs() does not work for - // functions, so we need to find another way to handle constant inputs. - for (int i = const_input_types.size(); - i < fdef->signature().input_arg_size(); ++i) { - VLOG(1) << "Adding Targs from input arg " << i; - const tensorflow::OpDef::ArgDef& arg = fdef->signature().input_arg(i); - arg_input_types.push_back(static_cast(arg.type())); - } - } - DCHECK(fdef != nullptr); - - // Copy inputs and their devices. - // Since input param reordering may have occurred between `op` and `launch_op` - // via `op_input_to_func_input`, adjust the actual inputs accordingly. - *launch_op->operation.MutableInputs() = op->operation.Inputs(); - for (tensorflow::TensorHandle* h : launch_op->operation.Inputs()) { - h->Ref(); - } - if (!op_input_to_func_input.empty()) { - DCHECK_EQ(op->operation.Inputs().size(), op_input_to_func_input.size()); - for (int i = 0; i < op_input_to_func_input.size(); ++i) { - VLOG(1) << "mapping op input " << i << " to func input " - << op_input_to_func_input[i]; - - (*launch_op->operation.MuableInputs())[op_input_to_func_input[i]] = - op->operation.Inputs()[i]; - } - } - launch_op->operation.MutableAttrs()->NumInputs(op->operation.Inputs().size()); - - TFE_OpSetAttrTypeList(launch_op.get(), "Tconstants", const_input_types.data(), - const_input_types.size()); - - // Set Targs and Nresources attrs. - TFE_OpSetAttrTypeList(launch_op.get(), "Targs", arg_input_types.data(), - arg_input_types.size()); - const int num_resource_inputs = fdef->signature().input_arg_size() - - const_input_types.size() - - arg_input_types.size(); - TFE_OpSetAttrInt(launch_op.get(), "Nresources", num_resource_inputs); - - // Set Tresults attr. - std::vector tresults; - for (const tensorflow::OpDef::ArgDef& arg : fdef->signature().output_arg()) { - tresults.push_back(static_cast(arg.type())); - } - TFE_OpSetAttrTypeList(launch_op.get(), "Tresults", tresults.data(), - tresults.size()); - - // Set function attr. - tensorflow::AttrValue attr_value; - tensorflow::NameAttrList* func = attr_value.mutable_func(); - func->set_name(fdef->signature().name()); - launch_op->attrs.Set("function", attr_value); - - return launch_op; -} -#endif // TENSORFLOW_EAGER_USE_XLA -} // namespace - -extern "C" { - -void TFE_Execute(TFE_Op* tfe_op, TFE_TensorHandle** retvals, int* num_retvals, +void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals, TF_Status* status) { - tensorflow::EagerOperation* op = &tfe_op->operation; - tensorflow::EagerContext* ctx = op->EagerContext(); - status->status = ctx->GetStatus(); + tensorflow::gtl::InlinedVector handle_retvals( + *num_retvals); + status->status = + tensorflow::EagerExecute(&op->operation, &handle_retvals, num_retvals); if (!status->status.ok()) { return; } -#ifdef TENSORFLOW_EAGER_USE_XLA - std::unique_ptr xla_launch_op; - if (op->UseXla() && op->Name() != "_XlaLaunch") { - xla_launch_op = BuildXlaLaunch(op, status); - if (!status->status.ok()) { - return; - } - op = xla_launch_op.get(); - } -#endif // TENSORFLOW_EAGER_USE_XLA - // Ensure all resource-touching ops run in the device the resource is, - // regardless of anything else that has been specified. This is identical to - // the graph mode behavior. - for (int i = 0; i < op->Inputs().size(); ++i) { - tensorflow::Device* input_op_device = nullptr; - status->status = op->Inputs()[i]->OpDevice(&input_op_device); - if (!status->status.ok()) return; - VLOG(2) << "for op " << op->Name() << " input " << i << " " - << tensorflow::DataTypeString(op->Inputs()[i]->dtype) << " " - << (input_op_device == nullptr ? "cpu" : input_op_device->name()) - << " " << (op->Device() == nullptr ? "cpu" : op->Device()->name()); - if (op->Inputs()[i]->dtype == tensorflow::DT_RESOURCE && - (input_op_device != op->Device() || input_op_device == nullptr)) { - tensorflow::Device* d = - input_op_device == nullptr ? ctx->HostCPU() : input_op_device; - VLOG(1) << "Changing device of operation " << op->Name() << " to " - << d->name() << " because input #" << i - << " is a resource in this device."; - op->SetDevice(d); - } - } - tensorflow::Device* device = op->Device(); - - tensorflow::Fprint128 cache_key = op->MutableAttrs()->CacheKey( - device == nullptr ? "unspecified" : device->name()); - tensorflow::KernelAndDevice* kernel = ctx->GetCachedKernel(cache_key); - if (kernel == nullptr) { - const tensorflow::NodeDef& ndef = op->MutableAttrs()->BuildNodeDef(); - if (device == nullptr) { - device = SelectDevice(ndef, ctx, status); - if (!status->status.ok()) { - return; - } - } - CHECK(device != nullptr); - if (ctx->LogDevicePlacement()) { - LOG(INFO) << "Executing op " << ndef.op() << " in device " - << device->name(); - } - kernel = new tensorflow::KernelAndDevice(ctx->GetRendezvous()); - // Knowledge of the implementation of Init (and in-turn - // FunctionLibraryRuntime::CreateKernel) tells us that ctx->func_lib_def - // will be accessed, so grab on to the lock. - // See WARNING comment in Execute (before kernel->Run) - would be nice to - // rework to avoid this subtlety. - tensorflow::tf_shared_lock l(*ctx->FunctionsMu()); - status->status = - tensorflow::KernelAndDevice::Init(ndef, ctx->func_lib(device), kernel); - if (!status->status.ok()) { - delete kernel; - return; - } - // Update output_dtypes inside `kernel`. - const tensorflow::OpDef* op_def = nullptr; - const tensorflow::FunctionDef* function_def = - ctx->FuncLibDef()->Find(ndef.op()); - if (function_def != nullptr) { - op_def = &(function_def->signature()); - } - if (op_def == nullptr) { - status->status = OpDefForOp(ndef.op().c_str(), &op_def); - if (!status->status.ok()) { - return; - } - } - tensorflow::DataTypeVector input_dtypes; - status->status = InOutTypesForNode(ndef, *op_def, &input_dtypes, - kernel->mutable_output_dtypes()); - if (!status->status.ok()) { - return; - } - ctx->AddKernelToCache(cache_key, kernel); - } - const tensorflow::DataTypeVector& output_dtypes = kernel->output_dtypes(); - const int output_dtypes_size = output_dtypes.size(); - if (output_dtypes_size > *num_retvals) { - TF_SetStatus(status, TF_INVALID_ARGUMENT, - tensorflow::strings::StrCat("Expecting ", output_dtypes.size(), - " outputs, but *num_retvals is ", - *num_retvals) - .c_str()); - return; - } - *num_retvals = output_dtypes_size; - if (device == nullptr) { - // TODO(apassos) debug how the assignment below might return a different - // device from the one requested above. - device = kernel->device(); - } - status->status = ValidateInputTypeAndPlacement( - ctx, device, op, kernel->kernel(), - ctx->ShouldStoreMetadata() ? ctx->RunMetadataProto() : nullptr); - if (!status->status.ok()) return; - std::unique_ptr maybe_stats; - if (ctx->ShouldStoreMetadata()) { - maybe_stats.reset(new tensorflow::NodeExecStats); - maybe_stats->set_node_name(op->Name()); - maybe_stats->set_all_start_micros(tensorflow::Env::Default()->NowMicros()); - maybe_stats->set_op_start_rel_micros(0); - maybe_stats->set_scheduled_micros(tensorflow::Env::Default()->NowMicros()); - // TODO(apassos) track referenced tensors - } - if (ctx->Async()) { - // Note that for async mode, execution order will make sure that all - // input handles are ready before executing them. - // TODO(agarwal): Consider executing "cheap" kernels inline for performance. - tensorflow::gtl::InlinedVector handle_retvals( - *num_retvals); - tensorflow::uint64 id = ctx->NextId(); - for (int i = 0; i < *num_retvals; ++i) { - tensorflow::TensorHandle* h = - new tensorflow::TensorHandle(id, output_dtypes[i], ctx); - retvals[i] = new TFE_TensorHandle(h); - handle_retvals[i] = h; - } - tensorflow::EagerNode* node = new tensorflow::ExecuteNode( - id, ctx, op->Device(), op->Inputs(), kernel, maybe_stats.release(), - output_dtypes, handle_retvals); - ctx->ExecutorAdd(node); - } else { - // Execute checks if retvals[i] is nullptr or not to figure if it needs to - // allocate it. - tensorflow::gtl::InlinedVector handle_retvals( - *num_retvals); - status->status = tensorflow::EagerExecute( - ctx, op->Device(), op->Inputs(), kernel, maybe_stats.get(), - handle_retvals.data(), *num_retvals); - for (int i = 0; i < *num_retvals; ++i) { - retvals[i] = new TFE_TensorHandle(handle_retvals[i]); - } + for (int i = 0; i < *num_retvals; ++i) { + retvals[i] = new TFE_TensorHandle(handle_retvals[i]); } } diff --git a/tensorflow/core/common_runtime/eager/BUILD b/tensorflow/core/common_runtime/eager/BUILD index 00ac4a4e47..13d6b021b5 100644 --- a/tensorflow/core/common_runtime/eager/BUILD +++ b/tensorflow/core/common_runtime/eager/BUILD @@ -154,26 +154,15 @@ tf_cc_test( cc_library( name = "execute", srcs = ["execute.cc"], - hdrs = ["execute.h"], - deps = [ - ":context", - ":copy_to_device_node", - ":kernel_and_device", - ":tensor_handle", - "//tensorflow/core:core_cpu_lib", - "//tensorflow/core:framework", - "//tensorflow/core:lib", - "//tensorflow/core:protos_all_cc", + hdrs = [ + "execute.h", + "execute_node.h", ], -) - -cc_library( - name = "execute_node", - hdrs = ["execute_node.h"], deps = [ ":context", + ":copy_to_device_node", ":eager_executor", - ":execute", + ":eager_operation", ":kernel_and_device", ":tensor_handle", "//tensorflow/core:core_cpu_lib", diff --git a/tensorflow/core/common_runtime/eager/execute.cc b/tensorflow/core/common_runtime/eager/execute.cc index 98e8471102..a514f81e14 100644 --- a/tensorflow/core/common_runtime/eager/execute.cc +++ b/tensorflow/core/common_runtime/eager/execute.cc @@ -18,8 +18,10 @@ limitations under the License. #include #include "tensorflow/core/common_runtime/device.h" +#include "tensorflow/core/common_runtime/device_set.h" #include "tensorflow/core/common_runtime/eager/context.h" #include "tensorflow/core/common_runtime/eager/copy_to_device_node.h" +#include "tensorflow/core/common_runtime/eager/execute_node.h" #include "tensorflow/core/common_runtime/eager/kernel_and_device.h" #include "tensorflow/core/common_runtime/eager/tensor_handle.h" #include "tensorflow/core/framework/step_stats.pb.h" @@ -32,6 +34,493 @@ limitations under the License. namespace tensorflow { +namespace { + +// Initializes the step stats if needed. +void MaybeInitializeStepStats(StepStats* step_stats, EagerContext* ctx) { + // Lazily initialize the RunMetadata with information about all devices if + // this is the first call. + while (step_stats->dev_stats_size() < ctx->devices()->size()) { + int device_idx = step_stats->dev_stats_size(); + auto* dev_stats = step_stats->add_dev_stats(); + dev_stats->set_device(ctx->devices()->at(device_idx)->name()); + } +} + +int StepStatsDeviceIndex(StepStats* step_stats, EagerContext* ctx, + Device* device) { + // Find the current device's index. + if (device == nullptr) { + device = ctx->HostCPU(); + } + for (int i = 0; i < ctx->devices()->size(); ++i) { + if (ctx->devices()->at(i) == device || + ctx->devices()->at(i)->name() == device->name()) { + return i; + } + } + // TODO(apassos) do not fall back to host CPU if device is unknown. + return 0; +} + +Status ValidateInputTypeAndPlacement(EagerContext* ctx, Device* op_device, + EagerOperation* op, const OpKernel* kernel, + RunMetadata* run_metadata) { + Device* host_device = ctx->HostCPU(); + const MemoryTypeVector& memtypes = kernel->input_memory_types(); + if (memtypes.size() != op->Inputs().size()) { + return errors::InvalidArgument("expected ", memtypes.size(), + " inputs, got ", op->Inputs().size()); + } + for (int i = 0; i < op->Inputs().size(); ++i) { + const Device* expected_device = + memtypes[i] == HOST_MEMORY ? host_device : op_device; + TensorHandle* handle = op->Inputs()[i]; + Device* handle_device = nullptr; + TF_RETURN_IF_ERROR(handle->Device(&handle_device)); + const Device* actual_device = + handle_device == nullptr ? host_device : handle_device; + if (expected_device != actual_device) { + switch (ctx->GetDevicePlacementPolicy()) { + case DEVICE_PLACEMENT_SILENT_FOR_INT32: + // TODO(xpan): See if we could bubble python related error up + // to python level. + if (handle->dtype == DT_INT32) { + // Note: enabling silent copies of int32 tensors to match behavior + // of graph mode. + break; + } + TF_FALLTHROUGH_INTENDED; + case DEVICE_PLACEMENT_EXPLICIT: + return errors::InvalidArgument( + "Tensors on conflicting devices:" + " cannot compute ", + op->Name(), " as input #", i, " was expected to be on ", + expected_device->name(), " but is actually on ", + actual_device->name(), " (operation running on ", + op_device->name(), ")", + " Tensors can be copied explicitly using .gpu() or .cpu() " + "methods," + " or transparently copied by using tf.enable_eager_execution(" + "device_policy=tfe.DEVICE_PLACEMENT_SILENT). Copying tensors " + "between devices" + " may slow down your model"); + case DEVICE_PLACEMENT_WARN: + LOG(WARNING) << "before computing " << op->Name() << " input #" << i + << " was expected to be on " << expected_device->name() + << " but is actually on " << actual_device->name() + << " (operation running on " << op_device->name() + << "). This triggers a copy which can be a performance " + "bottleneck."; + break; + case DEVICE_PLACEMENT_SILENT: // Do nothing. + break; + } + // We are only here if the policy is warn or silent copies, so we should + // trigger a copy. + auto pre_time = Env::Default()->NowMicros(); + TensorHandle* copied_tensor = nullptr; + Status status = EagerCopyToDevice( + handle, ctx, expected_device->name().c_str(), &copied_tensor); + if (run_metadata != nullptr) { + auto* step_stats = run_metadata->mutable_step_stats(); + MaybeInitializeStepStats(step_stats, ctx); + // Record the sending on the source device for now. + int device_idx = StepStatsDeviceIndex(step_stats, ctx, handle_device); + auto* dev_stats = step_stats->mutable_dev_stats(device_idx); + auto* node_stats = dev_stats->add_node_stats(); + node_stats->set_node_name("_Send"); + node_stats->set_all_start_micros(pre_time); + node_stats->set_op_end_rel_micros(Env::Default()->NowMicros() - + pre_time); + } + if (!status.ok()) { + if (copied_tensor != nullptr) copied_tensor->Unref(); + return errors::Internal("Failed copying input tensor from ", + actual_device->name(), " to ", + expected_device->name(), " in order to run ", + op->Name(), ": ", status.error_message()); + } + handle->Unref(); + handle = copied_tensor; + (*op->MutableInputs())[i] = copied_tensor; + } + if (handle->dtype != kernel->input_type(i)) { + return errors::InvalidArgument( + "cannot compute ", op->Name(), " as input #", i, + " was expected to be a ", DataTypeString(kernel->input_type(i)), + " tensor but is a ", DataTypeString(handle->dtype), " tensor"); + } + } + return Status::OK(); +} + +Status SelectDevice(const NodeDef& ndef, EagerContext* ctx, Device** device) { + DeviceSet ds; + for (Device* d : *ctx->devices()) { + ds.AddDevice(d); + } + DeviceTypeVector final_devices; + auto status = SupportedDeviceTypesForNode(ds.PrioritizedDeviceTypeList(), + ndef, &final_devices); + if (!status.ok()) return status; + if (final_devices.empty()) { + return errors::Internal("Could not find valid device for node ", + ndef.DebugString()); + } + for (Device* d : *ctx->devices()) { + if (d->device_type() == final_devices[0].type_string()) { + *device = d; + return Status::OK(); + } + } + return errors::Unknown("Could not find a device for node ", + ndef.DebugString()); +} + +#ifdef TENSORFLOW_EAGER_USE_XLA +// Synthesizes and returns a wrapper function over `op`, which must be a +// primitive op (e.g. matmul). +// +// The wrapper function conforms to the function signature expected by +// _XlaLaunchOp, with input params ordered by . For example, if the op has input params , they will be reordered to as the input params to the synthesized function. +// +// It populates `const_input_types`, `arg_input_types` and +// `op_input_to_func_input` based on the reordering results, that the caller can +// use them to build an _XlaLaunchOp. On error, it returns NULL, and sets +// `status` accordingly. +const FunctionDef* OpToFunction(TFE_Op* op, + std::vector* const_input_types, + std::vector* arg_input_types, + gtl::FlatMap* op_input_to_func_input, + TF_Status* status) { + DCHECK(!op->operation.is_function()); + + FunctionDef fdef; + + // Get the OpDef of the op we are trying to encapsulate. + TFE_Context* ctx = op->operation.ctx; + const OpRegistrationData* op_data; + { + status = ctx->context.FindFunctionOpData(op->operation.Name(), &op_data); + if (!status.ok()) { + return nullptr; + } + } + const OpDef& op_def = op_data->op_def; + + OpDef* signature = fdef.mutable_signature(); + + // Handle constant inputs. + const std::unordered_set const_inputs( + *XlaOpRegistry::CompileTimeConstantInputs(op->operation.Name())); + + // First add place holders for the input args, so that we can refer to them by + // position in the next loop. Also tally up the resource inputs. + int num_resource_inputs = 0; + for (int i = 0; i < op_def.input_arg_size(); ++i) { + if (op_def.input_arg(i).type() == DT_RESOURCE) { + ++num_resource_inputs; + } + signature->add_input_arg(); + } + + // Now we map the input params from `op_def` to `signature`, where the param + // ordering for `signature` is: . + int const_index = 0; + int arg_index = const_inputs.size(); + int resource_index = op_def.input_arg_size() - num_resource_inputs; + for (int i = 0; i < op_def.input_arg_size(); ++i) { + const OpDef::ArgDef& op_input_arg = op_def.input_arg(i); + OpDef::ArgDef* func_input_arg = nullptr; + if (const_inputs.find(op_input_arg.name()) != const_inputs.end()) { + VLOG(1) << "For const input, mapping op input " << i << " to func input " + << const_index; + (*op_input_to_func_input)[i] = const_index; + func_input_arg = signature->mutable_input_arg(const_index++); + const_input_types->push_back( + static_cast(op->operation.Inputs()[i]->dtype)); + } else if (op_input_arg.type() == DT_RESOURCE) { + VLOG(1) << "For resource input, mapping op input " << i + << " to func input " << resource_index; + (*op_input_to_func_input)[i] = resource_index; + func_input_arg = signature->mutable_input_arg(resource_index++); + } else { + VLOG(1) << "For arg input, mapping op input " << i << " to func input " + << arg_index; + (*op_input_to_func_input)[i] = arg_index; + func_input_arg = signature->mutable_input_arg(arg_index++); + arg_input_types->push_back( + static_cast(op->operation.Inputs()[i]->dtype)); + } + + func_input_arg->set_name(op_input_arg.name()); + func_input_arg->set_type(op->operation.Inputs()[i]->dtype); + } + VLOG(1) << "Added OpDef Inputs: " << fdef.DebugString(); + + // Resources args are at the end of the function input params, and we should + // have iterated over all of them. + DCHECK_EQ(signature->input_arg_size(), resource_index); + + // Make the synthesized function's name unique. + signature->set_name( + strings::StrCat(op_def.name(), func_id_generator.fetch_add(1))); + + // Add the node def and set its input names to match op_def's names. + const NodeDef& ndef = op->operation.MutableAttrs()->BuildNodeDef(); + DCHECK_EQ(signature->input_arg_size(), ndef.input_size()); + *fdef.add_node_def() = ndef; + for (int i = 0; i < op_def.input_arg_size(); ++i) { + fdef.mutable_node_def(0)->set_input(i, op_def.input_arg(i).name()); + } + VLOG(1) << "Added NodeDef: " << fdef.DebugString(); + + // Fix the output names and set output types. + for (int i = 0; i < op_def.output_arg_size(); ++i) { + OpDef::ArgDef* arg = signature->add_output_arg(); + const OpDef::ArgDef& op_def_arg = op_def.output_arg(i); + const string& out_tensor_name = + strings::StrCat(ndef.name(), ":", op_def_arg.name(), ":", 0); + arg->set_name(op_def_arg.name()); + (*fdef.mutable_ret())[op_def_arg.name()] = out_tensor_name; + const string& type_attr = op_def_arg.type_attr(); + if (!type_attr.empty()) { + auto i = ndef.attr().find(type_attr); + if (i == ndef.attr().end()) { + status = errors::InvalidArgument( + strings::StrCat("Could not find attr ", type_attr, " in NodeDef ", + ndef.DebugString())); + return nullptr; + } + arg->set_type(i->second.type()); + } + } + VLOG(1) << "Fixed Output names and all types: " << fdef.DebugString(); + + status = ctx->context.AddFunctionDef(fdef); + if (!status.ok()) return nullptr; + const auto ret = ctx->context.FindFunctionDef(signature->name()); + DCHECK(ret != nullptr); + return ret; +} + +// Builds an _XLALaunchOp as a wrapper over 'op', so that 'op' can be executed +// via XLA. +std::unique_ptr BuildXlaLaunch(TFE_Op* op, TF_Status* status) { + VLOG(1) << "Creating _XlaLaunchOp for TFE_Op " << op->operation.Name(); + auto launch_op = std::unique_ptr( + TFE_NewOp(op->operation.ctx, "_XlaLaunch", status)); + if (TF_GetCode(status) != TF_OK) return nullptr; + if (op->operation.device) { + TFE_OpSetDevice(launch_op.get(), op->operation.device->name().c_str(), + status); + if (TF_GetCode(status) != TF_OK) return nullptr; + } + + const FunctionDef* fdef; + { fdef = op->operation.ctx->FindFunctionDef(op->operation.Name()); } + std::vector const_input_types; + std::vector arg_input_types; + gtl::FlatMap op_input_to_func_input; + if (fdef == nullptr) { + // See if this is a primitive op, and if so create a function for it, so + // that _XlaLaunchOp can access it. + fdef = OpToFunction(op, &const_input_types, &arg_input_types, + &op_input_to_func_input, status); + if (!status.ok()) return nullptr; + } else { + // TODO(hongm): XlaOpRegistry::CompileTimeConstantInputs() does not work for + // functions, so we need to find another way to handle constant inputs. + for (int i = const_input_types.size(); + i < fdef->signature().input_arg_size(); ++i) { + VLOG(1) << "Adding Targs from input arg " << i; + const OpDef::ArgDef& arg = fdef->signature().input_arg(i); + arg_input_types.push_back(static_cast(arg.type())); + } + } + DCHECK(fdef != nullptr); + + // Copy inputs and their devices. + // Since input param reordering may have occurred between `op` and `launch_op` + // via `op_input_to_func_input`, adjust the actual inputs accordingly. + *launch_op->operation.MutableInputs() = op->operation.Inputs(); + for (TensorHandle* h : launch_op->operation.Inputs()) { + h->Ref(); + } + if (!op_input_to_func_input.empty()) { + DCHECK_EQ(op->operation.Inputs().size(), op_input_to_func_input.size()); + for (int i = 0; i < op_input_to_func_input.size(); ++i) { + VLOG(1) << "mapping op input " << i << " to func input " + << op_input_to_func_input[i]; + + (*launch_op->operation.MuableInputs())[op_input_to_func_input[i]] = + op->operation.Inputs()[i]; + } + } + launch_op->operation.MutableAttrs()->NumInputs(op->operation.Inputs().size()); + + TFE_OpSetAttrTypeList(launch_op.get(), "Tconstants", const_input_types.data(), + const_input_types.size()); + + // Set Targs and Nresources attrs. + TFE_OpSetAttrTypeList(launch_op.get(), "Targs", arg_input_types.data(), + arg_input_types.size()); + const int num_resource_inputs = fdef->signature().input_arg_size() - + const_input_types.size() - + arg_input_types.size(); + TFE_OpSetAttrInt(launch_op.get(), "Nresources", num_resource_inputs); + + // Set Tresults attr. + std::vector tresults; + for (const OpDef::ArgDef& arg : fdef->signature().output_arg()) { + tresults.push_back(static_cast(arg.type())); + } + TFE_OpSetAttrTypeList(launch_op.get(), "Tresults", tresults.data(), + tresults.size()); + + // Set function attr. + AttrValue attr_value; + NameAttrList* func = attr_value.mutable_func(); + func->set_name(fdef->signature().name()); + launch_op->attrs.Set("function", attr_value); + + return launch_op; +} +#endif // TENSORFLOW_EAGER_USE_XLA + +} // namespace + +Status EagerExecute(EagerOperation* op, + gtl::InlinedVector* retvals, + int* num_retvals) { + EagerContext* ctx = op->EagerContext(); + auto status = ctx->GetStatus(); + if (!status.ok()) return status; +#ifdef TENSORFLOW_EAGER_USE_XLA + std::unique_ptr xla_launch_op; + if (op->UseXla() && op->Name() != "_XlaLaunch") { + xla_launch_op = BuildXlaLaunch(op, status); + if (!status.ok()) return status; + op = xla_launch_op.get(); + } +#endif // TENSORFLOW_EAGER_USE_XLA + // Ensure all resource-touching ops run in the device the resource is, + // regardless of anything else that has been specified. This is identical to + // the graph mode behavior. + for (int i = 0; i < op->Inputs().size(); ++i) { + Device* input_op_device = nullptr; + status = op->Inputs()[i]->OpDevice(&input_op_device); + if (!status.ok()) return status; + VLOG(2) << "for op " << op->Name() << " input " << i << " " + << DataTypeString(op->Inputs()[i]->dtype) << " " + << (input_op_device == nullptr ? "cpu" : input_op_device->name()) + << " " << (op->Device() == nullptr ? "cpu" : op->Device()->name()); + if (op->Inputs()[i]->dtype == DT_RESOURCE && + (input_op_device != op->Device() || input_op_device == nullptr)) { + Device* d = input_op_device == nullptr ? ctx->HostCPU() : input_op_device; + VLOG(1) << "Changing device of operation " << op->Name() << " to " + << d->name() << " because input #" << i + << " is a resource in this device."; + op->SetDevice(d); + } + } + Device* device = op->Device(); + + Fprint128 cache_key = op->MutableAttrs()->CacheKey( + device == nullptr ? "unspecified" : device->name()); + KernelAndDevice* kernel = ctx->GetCachedKernel(cache_key); + if (kernel == nullptr) { + const NodeDef& ndef = op->MutableAttrs()->BuildNodeDef(); + if (device == nullptr) { + status = SelectDevice(ndef, ctx, &device); + if (!status.ok()) return status; + } + CHECK(device != nullptr); + if (ctx->LogDevicePlacement()) { + LOG(INFO) << "Executing op " << ndef.op() << " in device " + << device->name(); + } + kernel = new KernelAndDevice(ctx->GetRendezvous()); + // Knowledge of the implementation of Init (and in-turn + // FunctionLibraryRuntime::CreateKernel) tells us that ctx->func_lib_def + // will be accessed, so grab on to the lock. + // See WARNING comment in Execute (before kernel->Run) - would be nice to + // rework to avoid this subtlety. + tf_shared_lock l(*ctx->FunctionsMu()); + status = KernelAndDevice::Init(ndef, ctx->func_lib(device), kernel); + if (!status.ok()) { + delete kernel; + return status; + } + // Update output_dtypes inside `kernel`. + const OpDef* op_def = nullptr; + const FunctionDef* function_def = ctx->FuncLibDef()->Find(ndef.op()); + if (function_def != nullptr) { + op_def = &(function_def->signature()); + } + if (op_def == nullptr) { + status = OpDefForOp(ndef.op().c_str(), &op_def); + if (!status.ok()) return status; + } + DataTypeVector input_dtypes; + status = InOutTypesForNode(ndef, *op_def, &input_dtypes, + kernel->mutable_output_dtypes()); + if (!status.ok()) return status; + ctx->AddKernelToCache(cache_key, kernel); + } + const DataTypeVector& output_dtypes = kernel->output_dtypes(); + const int output_dtypes_size = static_cast(output_dtypes.size()); + if (output_dtypes_size > *num_retvals) { + return errors::InvalidArgument("Expecting ", output_dtypes.size(), + " outputs, but *num_retvals is ", + *num_retvals); + } + *num_retvals = output_dtypes_size; + if (device == nullptr) { + // TODO(apassos) debug how the assignment below might return a different + // device from the one requested above. + device = kernel->device(); + } + status = ValidateInputTypeAndPlacement( + ctx, device, op, kernel->kernel(), + ctx->ShouldStoreMetadata() ? ctx->RunMetadataProto() : nullptr); + if (!status.ok()) return status; + std::unique_ptr maybe_stats; + if (ctx->ShouldStoreMetadata()) { + maybe_stats.reset(new NodeExecStats); + maybe_stats->set_node_name(op->Name()); + maybe_stats->set_all_start_micros(Env::Default()->NowMicros()); + maybe_stats->set_op_start_rel_micros(0); + maybe_stats->set_scheduled_micros(Env::Default()->NowMicros()); + // TODO(apassos) track referenced tensors + } + retvals->resize(*num_retvals); + if (ctx->Async()) { + // Note that for async mode, execution order will make sure that all + // input handles are ready before executing them. + // TODO(agarwal): Consider executing "cheap" kernels inline for performance. + tensorflow::uint64 id = ctx->NextId(); + for (int i = 0; i < *num_retvals; ++i) { + (*retvals)[i] = new TensorHandle(id, output_dtypes[i], ctx); + } + EagerNode* node = + new ExecuteNode(id, ctx, op->Device(), op->Inputs(), kernel, + maybe_stats.release(), output_dtypes, *retvals); + ctx->ExecutorAdd(node); + } else { + // Execute checks if retvals[i] is nullptr or not to figure if it needs to + // allocate it. + status = EagerExecute(ctx, op->Device(), op->Inputs(), kernel, + maybe_stats.get(), retvals->data(), *num_retvals); + } + + return status; +} + Status EagerExecute(EagerContext* ctx, Device* device, const gtl::InlinedVector& op_inputs, KernelAndDevice* kernel, NodeExecStats* maybe_stats, diff --git a/tensorflow/core/common_runtime/eager/execute.h b/tensorflow/core/common_runtime/eager/execute.h index 0f6ad031e1..7c8d7e164d 100644 --- a/tensorflow/core/common_runtime/eager/execute.h +++ b/tensorflow/core/common_runtime/eager/execute.h @@ -17,6 +17,7 @@ limitations under the License. #include "tensorflow/core/common_runtime/device.h" #include "tensorflow/core/common_runtime/eager/context.h" +#include "tensorflow/core/common_runtime/eager/eager_operation.h" #include "tensorflow/core/common_runtime/eager/kernel_and_device.h" #include "tensorflow/core/common_runtime/eager/tensor_handle.h" #include "tensorflow/core/framework/step_stats.pb.h" @@ -25,6 +26,12 @@ limitations under the License. namespace tensorflow { +// Utility function that executes a fully constructed EagerOperation. +Status EagerExecute( + EagerOperation* op, + tensorflow::gtl::InlinedVector* retvals, + int* num_retvals); + // Low-level utility to execute the kernel specified by kernel on device device, // with the inputs op_inputs, in the context ctx. Status EagerExecute(EagerContext* ctx, Device* device, -- GitLab From 60a0e2f5261cf72da4e4d8e65b56b695d611b984 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 20 Apr 2018 15:19:59 -0700 Subject: [PATCH 099/434] Do not force default layout when there is no need to. Allow the inner computations to negotiate a root and parameter layouts different from default. PiperOrigin-RevId: 193731341 --- tensorflow/compiler/xla/service/BUILD | 3 + .../xla/service/computation_layout.cc | 7 +- .../compiler/xla/service/computation_layout.h | 5 +- .../compiler/xla/service/hlo_instruction.h | 8 + .../compiler/xla/service/layout_assignment.cc | 328 +++++++++++++----- .../compiler/xla/service/layout_assignment.h | 65 +++- tensorflow/compiler/xla/service/service.cc | 5 +- .../compiler/xla/service/tuple_simplifier.cc | 25 +- 8 files changed, 325 insertions(+), 121 deletions(-) diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index 9555d91817..bc577c173d 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -1953,10 +1953,12 @@ cc_library( deps = [ ":computation_layout", ":hlo", + ":hlo_dce", ":hlo_graph_dumper", ":hlo_pass", ":logical_buffer", ":tuple_points_to_analysis", + ":tuple_simplifier", "//tensorflow/compiler/xla:shape_layout", "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla:status_macros", @@ -2433,6 +2435,7 @@ cc_library( "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla:status_macros", "//tensorflow/compiler/xla:util", + "//tensorflow/compiler/xla:xla_data_proto", "//tensorflow/core:lib", ], ) diff --git a/tensorflow/compiler/xla/service/computation_layout.cc b/tensorflow/compiler/xla/service/computation_layout.cc index d2d4f14fce..cb61f3da39 100644 --- a/tensorflow/compiler/xla/service/computation_layout.cc +++ b/tensorflow/compiler/xla/service/computation_layout.cc @@ -23,12 +23,15 @@ limitations under the License. namespace xla { -ComputationLayout::ComputationLayout(const ProgramShape& program_shape) +ComputationLayout::ComputationLayout(const ProgramShape& program_shape, + bool ignore_layouts) : result_layout_(program_shape.result()) { for (auto& shape : program_shape.parameters()) { parameter_layouts_.emplace_back(shape); } - SetToDefaultLayout(); + if (ignore_layouts) { + SetToDefaultLayout(); + } } void ComputationLayout::SetToDefaultLayout() { diff --git a/tensorflow/compiler/xla/service/computation_layout.h b/tensorflow/compiler/xla/service/computation_layout.h index 80e102411c..53c3a3f7b7 100644 --- a/tensorflow/compiler/xla/service/computation_layout.h +++ b/tensorflow/compiler/xla/service/computation_layout.h @@ -34,8 +34,9 @@ class ComputationLayout { public: // Constructs a ComputationLayout from a ProgramShape. The layouts of the // parameters and results are set to the default layout. Layouts in the - // ProgramShape are ignored. - explicit ComputationLayout(const ProgramShape& program_shape); + // ProgramShape are ignored if ignore_layouts is true. + explicit ComputationLayout(const ProgramShape& program_shape, + bool ignore_layouts = true); // Returns the layout of a particular parameter. const ShapeLayout& parameter_layout(int64 param_no) const { diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h index a5e9aecb9e..f3da3fc256 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.h +++ b/tensorflow/compiler/xla/service/hlo_instruction.h @@ -956,6 +956,14 @@ class HloInstruction { void clear_sharding() { sharding_ = nullptr; } // Return true if this operator has a sharding assigned. bool has_sharding() const { return sharding_ != nullptr; } + // Checks whether the instruction has compatible sharding with the other + // instruction. + bool has_compatible_sharding(const HloInstruction* other) const { + if (!has_sharding()) { + return !other->has_sharding(); + } + return other->has_sharding() ? sharding() == other->sharding() : false; + } // When creating a new instruction which either replaces, or shifts up (kCopy // insertion case), another instruction, we need to make sure the certain diff --git a/tensorflow/compiler/xla/service/layout_assignment.cc b/tensorflow/compiler/xla/service/layout_assignment.cc index 2494569db5..7067b6f86a 100644 --- a/tensorflow/compiler/xla/service/layout_assignment.cc +++ b/tensorflow/compiler/xla/service/layout_assignment.cc @@ -31,10 +31,12 @@ limitations under the License. #include "tensorflow/compiler/xla/ptr_util.h" #include "tensorflow/compiler/xla/service/computation_layout.h" #include "tensorflow/compiler/xla/service/hlo_computation.h" +#include "tensorflow/compiler/xla/service/hlo_dce.h" #include "tensorflow/compiler/xla/service/hlo_graph_dumper.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" #include "tensorflow/compiler/xla/service/hlo_opcode.h" #include "tensorflow/compiler/xla/service/logical_buffer.h" +#include "tensorflow/compiler/xla/service/tuple_simplifier.h" #include "tensorflow/compiler/xla/shape_layout.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/status_macros.h" @@ -400,9 +402,9 @@ string LayoutConstraints::ToString() const { } Status LayoutAssignment::AddMandatoryConstraints( - const ComputationLayout& computation_layout, - const ChannelLayoutConstraints* channel_constraints, - HloComputation* computation, LayoutConstraints* constraints) { + const ComputationLayout* computation_layout, + ChannelLayoutConstraints* channel_constraints, HloComputation* computation, + LayoutConstraints* constraints) { VLOG(3) << "Adding mandatory layout constraints to computation " << computation->name(); @@ -424,11 +426,16 @@ Status LayoutAssignment::AddMandatoryConstraints( TF_RETURN_IF_ERROR(constraints->SetOperandLayout( instruction->outfeed_shape(), instruction, 0)); } else if (instruction->opcode() == HloOpcode::kParameter) { - // Parameter layouts must match the respective layout in - // ComputationLayout. - shape_with_layout = - &computation_layout.parameter_layout(instruction->parameter_number()) - .shape(); + if (computation_layout != nullptr) { + const ShapeLayout& parameter_layout = + computation_layout->parameter_layout( + instruction->parameter_number()); + if (parameter_layout.LayoutIsSet()) { + // Parameter layouts must match the respective layout in + // ComputationLayout, if there is one. + shape_with_layout = ¶meter_layout.shape(); + } + } } if (shape_with_layout != nullptr) { TF_RETURN_IF_ERROR( @@ -493,9 +500,8 @@ Status LayoutAssignment::AddMandatoryConstraints( HloComputation* body = instruction->while_body(); HloComputation* condition = instruction->while_condition(); const HloInstruction* init = instruction->operand(0); - const ComputationLayout& body_layout = - FindOrDie(computation_layouts_, body); - const ComputationLayout& condition_layout = + ComputationLayout& body_layout = FindOrDie(computation_layouts_, body); + ComputationLayout& condition_layout = FindOrDie(computation_layouts_, condition); // Check a few invariants irrespective of layout. @@ -508,26 +514,19 @@ Status LayoutAssignment::AddMandatoryConstraints( condition_layout.parameter_shape(0))); DCHECK(ShapeUtil::Compatible(body_layout.result_shape(), init->shape())); - // Return error if earlier layout assignment of the embedded computations - // has produced conflicting layouts. - if (!ShapeUtil::Equal(body_layout.result_shape(), - body_layout.parameter_shape(0))) { - return InternalError( - "Parameter and result of body computation %s of while instruction " - "%s have different layouts: %s vs %s", - body->name().c_str(), instruction->name().c_str(), - ShapeUtil::HumanString(body_layout.result_shape()).c_str(), - ShapeUtil::HumanString(body_layout.parameter_shape(0)).c_str()); + if (body_layout.result_layout() != body_layout.parameter_layout(0)) { + VLOG(2) << "Reset %while body parameter layout: body=" << body->name() + << " while=" << instruction->name() + << " shape=" << body_layout.result_layout().ToString(); + *body_layout.mutable_parameter_layout(0) = body_layout.result_layout(); } - if (!ShapeUtil::Equal(body->root_instruction()->shape(), - condition->parameter_instruction(0)->shape())) { - return InternalError( - "Parameter of condition computation %s of while instruction " - "%s does not match body computation %s result: %s vs %s", - condition->name().c_str(), instruction->name().c_str(), - body->name().c_str(), - ShapeUtil::HumanString(condition_layout.parameter_shape(0)).c_str(), - ShapeUtil::HumanString(body_layout.result_shape()).c_str()); + if (condition_layout.parameter_layout(0) != + body_layout.parameter_layout(0)) { + VLOG(2) << "Reset %while condition parameter layout: cond=" + << condition->name() << " while=" << instruction->name() + << " shape=" << body_layout.parameter_layout(0).ToString(); + *condition_layout.mutable_parameter_layout(0) = + body_layout.parameter_layout(0); } // Constrain the output and the operand of the while instruction to match @@ -557,7 +556,20 @@ Status LayoutAssignment::AddMandatoryConstraints( true_computation_layout.parameter_shape(0))); DCHECK(ShapeUtil::Compatible( false_operand->shape(), false_computation_layout.parameter_shape(0))); - + if (true_computation_layout.result_layout() != + false_computation_layout.result_layout()) { + // We assign layouts in DFS fashion, so the true and false computations + // might have negotiated a different layout. But for the conditional + // instruction POV the layout must match, so we run again on the false + // computation, this time with proper computation layout. + VLOG(2) << "Reset %conditional false computation result layout: " + "false_computation=" + << false_computation->name() + << " conditional=" << instruction->name() << " shape=" + << true_computation_layout.result_layout().ToString(); + *false_computation_layout.mutable_result_layout() = + true_computation_layout.result_layout(); + } TF_RETURN_IF_ERROR(constraints->SetInstructionLayout( true_computation_layout.result_shape(), instruction)); TF_RETURN_IF_ERROR(constraints->SetOperandLayout( @@ -593,10 +605,14 @@ Status LayoutAssignment::AddMandatoryConstraints( } } } - - // Finally set the result layout to match ComputationLayout. - return constraints->SetResultLayout( - computation_layout.result_layout().shape()); + // Finally set the result layout to match ComputationLayout, if there is one. + if (computation_layout != nullptr) { + const ShapeLayout& result_layout = computation_layout->result_layout(); + if (result_layout.LayoutIsSet()) { + TF_RETURN_IF_ERROR(constraints->SetResultLayout(result_layout.shape())); + } + } + return Status::OK(); } namespace { @@ -760,6 +776,7 @@ StatusOr LayoutAssignment::CreateCopyWithNewLayout( HloInstruction* copy = instruction->parent()->AddInstruction(HloInstruction::CreateUnary( instruction->shape(), HloOpcode::kCopy, instruction)); + RegisterAddedCopy(copy); SetupCopiedInstruction(*instruction, copy, {}); LayoutUtil::ClearLayout(copy->mutable_shape()); TF_RETURN_IF_ERROR(LayoutUtil::CopyLayoutBetweenShapes( @@ -783,13 +800,19 @@ Status LayoutAssignment::CopyOperandIfLayoutsDiffer( TF_RET_CHECK(LayoutUtil::HasLayout(operand->shape())); if (ShapeUtil::Equal(operand_layout.shape(), operand->shape())) { + VLOG(5) << "Operand " << operand->ToString() << " layout matches in " + << instruction->ToString(); // Operand layout already matches our constraint. Nothing to do. return Status::OK(); } + VLOG(4) << "Operand " << operand->ToString() << " layout does not match " + << operand_layout.ToString() << " in " << instruction->ToString(); TF_ASSIGN_OR_RETURN(HloInstruction * operand_copy, CreateCopyWithNewLayout(operand_layout.shape(), operand)); + VLOG(4) << "New copy of " << operand->ToString() << " is " + << operand_copy->ToString(); return instruction->ReplaceOperandWith(operand_no, operand_copy); } @@ -896,15 +919,16 @@ Status LayoutAssignment::CheckLayouts(HloModule* module) { } } } - - // Finally verify the result layout matches the layout of the entry + // Finally verify the result layout, if set, matches the layout of the entry // computation root. - TF_RET_CHECK(ShapeUtil::Equal( - module->entry_computation()->root_instruction()->shape(), + const ShapeLayout& result_layout = FindOrDie(computation_layouts_, module->entry_computation()) - .result_layout() - .shape())); - + .result_layout(); + if (result_layout.LayoutIsSet()) { + TF_RET_CHECK(ShapeUtil::Equal( + module->entry_computation()->root_instruction()->shape(), + result_layout.shape())); + } return Status::OK(); } @@ -913,18 +937,13 @@ LayoutAssignment::LayoutAssignment( ChannelLayoutConstraints* channel_constraints) : entry_computation_layout_(entry_computation_layout), channel_layout_constraints_(channel_constraints) { - VLOG(1) << "entry computation layout given to layout assignment: " + VLOG(1) << "Entry computation layout given to layout assignment: " << entry_computation_layout_->ToString(); // Layouts of all parameter instructions must be set. for (const ShapeLayout& parameter_layout : entry_computation_layout_->parameter_layouts()) { CHECK(parameter_layout.LayoutIsSet()); } - // If the result layout is not set, then choose the default. - // TODO(b/29118294): Choose a better layout in this case. - if (!entry_computation_layout_->result_layout().LayoutIsSet()) { - entry_computation_layout_->mutable_result_layout()->SetToDefaultLayout(); - } } std::unique_ptr LayoutAssignment::ChooseOperandLayoutFromOutputLayout( @@ -1484,16 +1503,60 @@ Status LayoutAssignment::AssignLayouts(const LayoutConstraints& constraints, return Status::OK(); } +Status LayoutAssignment::CalculateComputationLayout( + HloComputation* computation) { + ComputationLayout computation_layout(computation->ComputeProgramShape(), + /*ignore_layouts=*/false); + InsertOrDie(&computation_layouts_, computation, computation_layout); + VLOG(2) << " Calculated ComputationLayout = " + << computation_layout.ToString(); + return Status::OK(); +} + +Status LayoutAssignment::ClearComputationLayouts(HloComputation* computation) { + // Clear existing layouts of the instructions. All layouts must be assigned + // by the LayoutAssignment pass, except for those on infeeds, parameters, + // and the computation result. The latter two are specified in + // computation_layout, so we only need to keep the existing layouts for + // infeeds. Clearing the layouts here avoids hiding potential bugs in the + // layout assignment pass that may accidently use the existing layout. + for (HloInstruction* instruction : computation->instructions()) { + if (instruction->opcode() == HloOpcode::kBitcast) { + // bitcasts are inherently layout sensitive and so a bitcast instruction + // present in the IR before layout assignment is a bug. + return InternalError( + "Unexpected bitcast operation seen during layout assignment: %s.", + instruction->ToString().c_str()); + } + if (instruction->opcode() != HloOpcode::kInfeed) { + LayoutUtil::ClearLayout(instruction->mutable_shape()); + } + } + return Status::OK(); +} + Status LayoutAssignment::RunOnComputation( - const ComputationLayout& computation_layout, + ComputationLayout* computation_layout, const TuplePointsToAnalysis& points_to_analysis, HloComputation* computation, ChannelLayoutConstraints* channel_constraints) { - DCHECK(computation_layout.LayoutIsSet()); - InsertOrDie(&computation_layouts_, computation, computation_layout); VLOG(2) << "LayoutAssignment::RunOnComputation(" << computation->name() << ")"; - VLOG(2) << " ComputationLayout = " << computation_layout.ToString(); + TF_RETURN_IF_ERROR(ClearComputationLayouts(computation)); + if (computation_layout != nullptr) { + auto it = computation_layouts_.find(computation); + if (it == computation_layouts_.end()) { + VLOG(2) << " New ComputationLayout = " << computation_layout->ToString(); + computation_layouts_.emplace(computation, *computation_layout); + } else { + TF_RET_CHECK(computation_layout == &it->second || + computation_layout == entry_computation_layout_); + VLOG(2) << " Existing ComputationLayout = " + << computation_layout->ToString(); + } + } else { + VLOG(2) << " No ComputationLayout specified (will be calculated)"; + } // Construct LayoutConstraints with all layout constraints of the computation. LayoutConstraints constraints(points_to_analysis, computation); @@ -1536,12 +1599,19 @@ Status LayoutAssignment::RunOnComputation( CHECK_LT(constraints.unconstrained_buffer_ids().size(), unconstrained_count); } - // All logical buffers should have constraints at this point. All that // remains is assign the constraints to the buffers and infer layouts for // aliased buffers. TF_RETURN_IF_ERROR(AssignLayouts(constraints, computation)); + // If the computation layout wasn't specified, now it is the time to compute + // it according to the parameters and root instruction layouts. + // This allows the first pass through this API to record the best flowing + // layout to parameters and root instruction. + if (computation_layout == nullptr) { + TF_RETURN_IF_ERROR(CalculateComputationLayout(computation)); + } + // Record the layouts assigned for any communication ops in // channel_constraints so that they are constrained for future modules. for (HloInstruction* instruction : computation->instructions()) { @@ -1556,6 +1626,34 @@ Status LayoutAssignment::RunOnComputation( return Status::OK(); } +Status LayoutAssignment::PropagateComputationLayouts( + HloComputation* computation, ComputationLayout* computation_layout) { + ComputationLayout computed_computation_layout( + computation->ComputeProgramShape(), + /*ignore_layouts=*/false); + for (int64 i = 0; i < computed_computation_layout.parameter_count(); ++i) { + ShapeLayout* param_layout = computation_layout->mutable_parameter_layout(i); + if (!param_layout->LayoutIsSet()) { + VLOG(4) << "Assigning layout to parameter " << i << " of computation " + << computation->name() << ": " + << computed_computation_layout.parameter_layout(i).ToString(); + *param_layout = computed_computation_layout.parameter_layout(i); + } else { + TF_RET_CHECK(computed_computation_layout.parameter_layout(i) == + *param_layout); + } + } + ShapeLayout* result_layout = computation_layout->mutable_result_layout(); + if (!result_layout->LayoutIsSet()) { + VLOG(4) << "Assigning result layout of computation " << computation->name() + << ": " << computed_computation_layout.result_layout().ToString(); + *result_layout = computed_computation_layout.result_layout(); + } else { + TF_RET_CHECK(computed_computation_layout.result_layout() == *result_layout); + } + return Status::OK(); +} + StatusOr LayoutAssignment::Run(HloModule* module) { VLOG(2) << "Running layout assignment on module " << module->name(); XLA_VLOG_LINES(3, module->ToString()); @@ -1564,52 +1662,45 @@ StatusOr LayoutAssignment::Run(HloModule* module) { "before layout assignment", module->config().debug_options()); } - - TF_ASSIGN_OR_RETURN(auto points_to_analysis, - TuplePointsToAnalysis::Run(module)); - - // Assign layouts to computations in an order such that a callee computation - // is handled before its caller computation. This ensures that the layout of - // all callers of a computation will agree. - std::list computation_post_order = - module->MakeComputationPostOrder(); - for (auto* computation : module->MakeComputationPostOrder()) { - if (computation->IsFusionComputation()) { - continue; - } - // Clear existing layouts of the instructions. All layouts must be assigned - // by the LayoutAssignment pass, except for those on infeeds, parameters, - // and the computation result. The latter two are specified in - // computation_layout, so we only need to keep the existing layouts for - // infeeds. Clearing the layouts here avoids hiding potential bugs in the - // layout assignment pass that may accidently use the existing layout. - for (HloInstruction* instruction : computation->instructions()) { - if (instruction->opcode() == HloOpcode::kBitcast) { - // bitcasts are inherently layout sensitive and so a bitcast instruction - // present in the IR before layout assignment is a bug. - return InternalError( - "Unexpected bitcast operation seen during layout assignment: %s.", - instruction->ToString().c_str()); + TF_RETURN_IF_ERROR(Init()); + + // We do two passes. The first one we pass a nullptr ComputationLayout to + // the RunOnComputation() calls (for non entry computations), and we register + // the ComputationLayout which are naturally flowing in DFS fashion to the + // parameters and root instruction. + // Walking in DFS mode though, means that we can end up with incorrect layouts + // when seen from an outer instruction, which has across-computation + // constraints to impose. + // For example, the kWhile instruction needs to enforce the same layouts for + // the parameters and root of the bosy, as well as the condition parameters. + // Similarly, the kConditional instruction needs to enforce the same layouts + // for the root of the true and false computations. + // So in the first pass, while allowing the layouts to flow to parameters and + // root, we also fix up the eventually inconsistent ComputationLayout, which + // will be then made mandatory by the second pass. + for (int64 i = 0; i < 2; ++i) { + TF_RETURN_IF_ERROR(ClearPreviousPassSideEffects(module)); + TF_ASSIGN_OR_RETURN(auto points_to_analysis, + TuplePointsToAnalysis::Run(module)); + for (auto* computation : module->MakeComputationPostOrder()) { + if (computation->IsFusionComputation()) { + continue; } - if (instruction->opcode() != HloOpcode::kInfeed) { - LayoutUtil::ClearLayout(instruction->mutable_shape()); + if (computation == module->entry_computation()) { + TF_RETURN_IF_ERROR(RunOnComputation( + entry_computation_layout_, *points_to_analysis, + module->entry_computation(), channel_layout_constraints_)); + } else { + ComputationLayout* computation_layout = + (i == 0) ? nullptr : &FindOrDie(computation_layouts_, computation); + TF_RETURN_IF_ERROR(RunOnComputation(computation_layout, + *points_to_analysis, computation, + channel_layout_constraints_)); } } - if (computation == module->entry_computation()) { - TF_RETURN_IF_ERROR(RunOnComputation( - *entry_computation_layout_, *points_to_analysis, - module->entry_computation(), channel_layout_constraints_)); - } else { - ComputationLayout computation_layout(computation->ComputeProgramShape()); - // Setting all embedded computations to the default layout is potentially - // suboptimal. - computation_layout.SetToDefaultLayout(); - TF_RETURN_IF_ERROR(RunOnComputation(computation_layout, - *points_to_analysis, computation, - channel_layout_constraints_)); - } } - + TF_RETURN_IF_ERROR(PropagateComputationLayouts(module->entry_computation(), + entry_computation_layout_)); TF_RETURN_IF_ERROR(CheckLayouts(module)); VLOG(3) << "After layout assignment:"; @@ -1619,9 +1710,54 @@ StatusOr LayoutAssignment::Run(HloModule* module) { "after layout assignment", module->config().debug_options()); } - // All layouts are reset then reassigned by this pass. return true; } +Status LayoutAssignment::Init() { + computation_layouts_.clear(); + return Status::OK(); +} + +Status LayoutAssignment::ClearPreviousPassSideEffects(HloModule* module) { + // Clear all the copies which have been added, and all the related + // instructions (like GTE and tuples). + int64 removed_copies = 0; + for (HloComputation* computation : module->computations()) { + for (HloInstruction* instruction : + computation->MakeInstructionPostOrder()) { + if (instruction->opcode() == HloOpcode::kCopy && + added_copies_.count(instruction) > 0) { + VLOG(5) << "Removing added copy: " << instruction->ToString(); + TF_RETURN_IF_ERROR( + instruction->ReplaceAllUsesWith(instruction->mutable_operand(0))); + TF_RETURN_IF_ERROR(computation->RemoveInstruction(instruction)); + ++removed_copies; + } + } + } + added_copies_.clear(); + if (removed_copies > 0) { + TupleSimplifier tuple_simplifier; + HloDCE dce; + TF_RETURN_IF_ERROR(tuple_simplifier.Run(module).status()); + TF_RETURN_IF_ERROR(dce.Run(module).status()); + } + return Status::OK(); +} + +Status LayoutAssignment::AddCopyForOperand(HloInstruction* instruction, + int64 operand_number) { + HloInstruction* operand = instruction->mutable_operand(operand_number); + if (operand->opcode() != HloOpcode::kCopy || operand->user_count() > 1) { + HloInstruction* copy = + instruction->parent()->AddInstruction(HloInstruction::CreateUnary( + operand->shape(), HloOpcode::kCopy, operand)); + SetupCopiedInstruction(*operand, copy, {}); + LayoutUtil::ClearLayout(copy->mutable_shape()); + TF_RETURN_IF_ERROR(instruction->ReplaceOperandWith(operand_number, copy)); + } + return Status::OK(); +} + } // namespace xla diff --git a/tensorflow/compiler/xla/service/layout_assignment.h b/tensorflow/compiler/xla/service/layout_assignment.h index ae4986d6ad..8b4e07995a 100644 --- a/tensorflow/compiler/xla/service/layout_assignment.h +++ b/tensorflow/compiler/xla/service/layout_assignment.h @@ -39,6 +39,7 @@ limitations under the License. #include "tensorflow/compiler/xla/xla_data.pb.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/lib/gtl/flatmap.h" +#include "tensorflow/core/lib/gtl/flatset.h" #include "tensorflow/core/platform/types.h" namespace xla { @@ -362,12 +363,15 @@ class LayoutAssignment : public HloPassInterface { int64 operand_no); private: + // Initializes the layout assignment object for a new Run() call. + Status Init(); + // Adds constraints which must be satisfied for correctness on all // backends. Called once prior to propagating constraints. - Status AddMandatoryConstraints( - const ComputationLayout& computation_layout, - const ChannelLayoutConstraints* channel_constraints, - HloComputation* computation, LayoutConstraints* constraints); + Status AddMandatoryConstraints(const ComputationLayout* computation_layout, + ChannelLayoutConstraints* channel_constraints, + HloComputation* computation, + LayoutConstraints* constraints); // This method can be overridden to add backend-specific constraints to the // layout of the instructions of a computation. This method is called after @@ -378,10 +382,12 @@ class LayoutAssignment : public HloPassInterface { } // Construct contraints and assign layouts to all instructions in the - // computation satisfying the given ComputationLayout. Layouts constraints are - // added, then propagated until all LogicalBuffers in the computation are - // constrained. - Status RunOnComputation(const ComputationLayout& computation_layout, + // computation satisfying the given ComputationLayout, if not nullptr. + // Otherwise the ComputationLayout will be calculated by propagating the + // computation instruction contraints. + // Layouts constraints are added, then propagated until all LogicalBuffers in + // the computation are constrained. + Status RunOnComputation(ComputationLayout* computation_layout, const TuplePointsToAnalysis& points_to_analysis, HloComputation* computation, ChannelLayoutConstraints* channel_constraints); @@ -402,6 +408,25 @@ class LayoutAssignment : public HloPassInterface { // necessary conditions. Status CheckLayouts(HloModule* module); + // Computes the ComputationLayout of the given computation based of the + // layouts assigned to parameters and root instruction, and inserts it to the + // computation_layouts_ map. + Status CalculateComputationLayout(HloComputation* computation); + + // Clears all the layouts which can be cleared within a computation. + Status ClearComputationLayouts(HloComputation* computation); + + // Clears the side effects of a previous pass, like added copy instructions. + Status ClearPreviousPassSideEffects(HloModule* module); + + // Propagates the layouts computed by the layout assignment pass on the given + // computation, to the computation layout passed in to this API. + // This API propagates missing layout, and also checks that the caller + // specified have been respected, by comparing those with the parameters and + // root computation instruction. + Status PropagateComputationLayouts(HloComputation* computation, + ComputationLayout* computation_layout); + ComputationLayout* entry_computation_layout_; protected: @@ -418,21 +443,37 @@ class LayoutAssignment : public HloPassInterface { // Creates and returns a copy of the given instruction with a different // layout. Tuple-shaped instructions will be deep-copied, and the last Tuple // instruction producing the copy is returned. - static StatusOr CreateCopyWithNewLayout( + StatusOr CreateCopyWithNewLayout( const Shape& shape_with_layout, HloInstruction* instruction); // Creates a copy of the given operand if the operand's layout does not match // the given layout. This copy replaces the use in the given instruction. // Tuple operands will be deep-copied. - static Status CopyOperandIfLayoutsDiffer(const ShapeLayout& operand_layout, - HloInstruction* instruction, - int64 operand_no); + Status CopyOperandIfLayoutsDiffer(const ShapeLayout& operand_layout, + HloInstruction* instruction, + int64 operand_no); + + // Registers a copy instruction added by the layout assignment pass. + void RegisterAddedCopy(HloInstruction* copy) { + CHECK_EQ(copy->opcode(), HloOpcode::kCopy); + added_copies_.insert(copy); + } + + // Adds a copy for the operand of an instruction, unless such operand is + // already a copy, and has a single user (which is forcibly the instruction + // itself). + Status AddCopyForOperand(HloInstruction* instruction, int64 operand_number); // Map containing the layouts of all computations assigned so // far. Computations are handled in a topological sort where computations are // handled before their caller instructions so the layouts of caller // instructions can be set to match the computation. std::map computation_layouts_; + + // Every copy added to the module by the layout assignment pass is registered + // here. + tensorflow::gtl::FlatSet added_copies_; + ChannelLayoutConstraints* channel_layout_constraints_; }; diff --git a/tensorflow/compiler/xla/service/service.cc b/tensorflow/compiler/xla/service/service.cc index 39f3aefdf8..a73118c68a 100644 --- a/tensorflow/compiler/xla/service/service.cc +++ b/tensorflow/compiler/xla/service/service.cc @@ -308,7 +308,10 @@ StatusOr> Service::CreateModuleConfig( computation_layout->mutable_result_layout()->CopyLayoutFromShape( shape_with_output_layout)); } else { - computation_layout->mutable_result_layout()->Clear(); + // TODO(b/78356948): We are forcing the default layout here. We should fix + // clients which expect a default layout, to be explicit about it, by + // passing the proper ExecutionOptions with shape_with_output_layout set. + computation_layout->mutable_result_layout()->SetToDefaultLayout(); } config->set_replica_count(options_.number_of_replicas()); diff --git a/tensorflow/compiler/xla/service/tuple_simplifier.cc b/tensorflow/compiler/xla/service/tuple_simplifier.cc index 113c2e2bd9..d668855084 100644 --- a/tensorflow/compiler/xla/service/tuple_simplifier.cc +++ b/tensorflow/compiler/xla/service/tuple_simplifier.cc @@ -69,6 +69,7 @@ StatusOr TupleSimplifier::Run(HloModule* module) { // Tuple // HloInstruction* top_tuple = nullptr; + HloInstruction* first_gte = nullptr; bool can_simplify = true; for (int64 operand_number = 0; operand_number < instruction->operand_count(); ++operand_number) { @@ -78,11 +79,17 @@ StatusOr TupleSimplifier::Run(HloModule* module) { can_simplify = false; break; } - + if (first_gte == nullptr) { + first_gte = operand; + } else if (!first_gte->has_compatible_sharding(operand)) { + can_simplify = false; + break; + } if (top_tuple == nullptr) { top_tuple = operand->mutable_operand(0); if (!ShapeUtil::Compatible(top_tuple->shape(), - instruction->shape())) { + instruction->shape()) || + !instruction->has_compatible_sharding(top_tuple)) { can_simplify = false; break; } @@ -108,15 +115,17 @@ StatusOr TupleSimplifier::Run(HloModule* module) { // | // GTE if (instruction->operand(0)->opcode() == HloOpcode::kTuple) { - changed = true; HloInstruction* element_source = instruction->mutable_operand(0)->mutable_operand( instruction->tuple_index()); - TF_RETURN_IF_ERROR(instruction->ReplaceAllUsesWith(element_source)); - for (HloInstruction* user : element_source->users()) { - if (user->opcode() == HloOpcode::kTuple || - user->opcode() == HloOpcode::kGetTupleElement) { - worklist.push(user); + if (instruction->has_compatible_sharding(element_source)) { + changed = true; + TF_RETURN_IF_ERROR(instruction->ReplaceAllUsesWith(element_source)); + for (HloInstruction* user : element_source->users()) { + if (user->opcode() == HloOpcode::kTuple || + user->opcode() == HloOpcode::kGetTupleElement) { + worklist.push(user); + } } } } -- GitLab From 6af31f6260161bab02db83d7e9e1d7ba7fd14b2c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 20 Apr 2018 15:20:37 -0700 Subject: [PATCH 100/434] [XLA] Redesign: add comparator and printer for the XlaOp. This is to prepare the migration of tf2xla. There were some codes used ComputationDataHandle::handle() for comparison/printing. Now implement XlaOp's comparator and printer. PiperOrigin-RevId: 193731437 --- .../compiler/xla/client/xla_client/xla_builder.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tensorflow/compiler/xla/client/xla_client/xla_builder.h b/tensorflow/compiler/xla/client/xla_client/xla_builder.h index 5977ee4f4b..4955f1515d 100644 --- a/tensorflow/compiler/xla/client/xla_client/xla_builder.h +++ b/tensorflow/compiler/xla/client/xla_client/xla_builder.h @@ -57,11 +57,27 @@ class XlaOp { StatusOr GetShape() const; + const XlaBuilder* builder() const { return builder_; } + + bool operator==(const XlaOp& rhs) const { + return handle_ == rhs.handle_ && builder_ == rhs.builder_; + } + + bool operator!=(const XlaOp& rhs) const { + return handle_ != rhs.handle_ || builder_ != rhs.builder_; + } + + friend std::ostream& operator<<(std::ostream& out, const XlaOp& op) { + out << op.handle(); + return out; + } + private: XlaOp(int64 handle, XlaBuilder* builder) : handle_(handle), builder_(builder) {} int64 handle() const { return handle_; } + friend class XlaBuilder; int64 handle_; -- GitLab From cadbb0b70b9441388a04533433245ac85f2887a9 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 20 Apr 2018 15:32:32 -0700 Subject: [PATCH 101/434] [XLA] Redesign: implement DumpToDirectory for the HloSession. This is to prepare the migration of tf2xla. PiperOrigin-RevId: 193733029 --- tensorflow/compiler/xla/service/BUILD | 1 + tensorflow/compiler/xla/service/executable.cc | 20 +++++++++++++++++++ tensorflow/compiler/xla/service/executable.h | 5 +++++ 3 files changed, 26 insertions(+) diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index bc577c173d..afb344e5ae 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -755,6 +755,7 @@ cc_library( ":hlo", ":hlo_execution_profile", ":hlo_graph_dumper", + ":hlo_proto", ":pool", ":session_proto", ":shaped_buffer", diff --git a/tensorflow/compiler/xla/service/executable.cc b/tensorflow/compiler/xla/service/executable.cc index b097ef79cc..8218b5f7c8 100644 --- a/tensorflow/compiler/xla/service/executable.cc +++ b/tensorflow/compiler/xla/service/executable.cc @@ -163,4 +163,24 @@ Status Executable::DumpSessionModule() { result); } +/* static */ Status Executable::DumpToDirectory(const string& directory_path, + string filename, + const HloSession& hlo_session) { + tensorflow::Env* env = tensorflow::Env::Default(); + if (!env->IsDirectory(directory_path).ok()) { + // NB! CreateDir does not work reliably with multiple XLA threads -- two + // threads can race to observe the absence of the dump directory and + // simultaneously try to create it, causing the "losing" thread to get a + // "directory already exists" error. + TF_RETURN_IF_ERROR(env->RecursivelyCreateDir(directory_path)); + } + filename = SanitizeFileName(std::move(filename)); + string file_path = tensorflow::io::JoinPath(directory_path, filename); + string result; + TF_RET_CHECK( + tensorflow::SerializeToStringDeterministic(hlo_session, &result)); + return tensorflow::WriteStringToFile(tensorflow::Env::Default(), file_path, + result); +} + } // namespace xla diff --git a/tensorflow/compiler/xla/service/executable.h b/tensorflow/compiler/xla/service/executable.h index 9c725f21d8..bdbe119120 100644 --- a/tensorflow/compiler/xla/service/executable.h +++ b/tensorflow/compiler/xla/service/executable.h @@ -22,6 +22,7 @@ limitations under the License. #include "tensorflow/compiler/xla/legacy_flags/debug_options_flags.h" #include "tensorflow/compiler/xla/service/computation_layout.h" #include "tensorflow/compiler/xla/service/device_memory_allocator.h" +#include "tensorflow/compiler/xla/service/hlo.pb.h" #include "tensorflow/compiler/xla/service/hlo_execution_profile.h" #include "tensorflow/compiler/xla/service/hlo_graph_dumper.h" #include "tensorflow/compiler/xla/service/hlo_module.h" @@ -155,6 +156,10 @@ class Executable { static Status DumpToDirectory(const string& directory_path, string filename, const SessionModule& session_module); + // Dump hlo_session to directory_path/filename. + static Status DumpToDirectory(const string& directory_path, string filename, + const HloSession& hlo_session); + protected: mutable tensorflow::mutex mutex_; -- GitLab From b2f786867dca85b6b848f09f2c1d40dd123fc0fc Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Fri, 20 Apr 2018 15:38:06 -0700 Subject: [PATCH 102/434] Always use the local worker name in CreateWorkerSession when not doing ClusterSpec propagation. Previously, the master would send a job name and task index in an otherwise-empty ServerDef, and the worker would unquestioningly use those to build its worker name. However, this would lead to errors if the worker had a local name like "/job:worker/replica:1/task:0", because the ServerDef doesn't support non-zero replica IDs, and so the local worker would end up an inconsistent view of what its worker name should be. In particular `WorkerSession::worker_name` would disagree with the device names added during graph partitioning by the master, which would lead to runtime failures ("InvalidArgumentError: Invalid rendezvous key"). PiperOrigin-RevId: 193733855 --- tensorflow/core/distributed_runtime/BUILD | 1 + .../distributed_runtime/master_session.cc | 28 +++++++++--------- .../core/distributed_runtime/session_mgr.cc | 6 ++-- .../distributed_runtime/session_mgr_test.cc | 29 +++++++++++++++++++ 4 files changed, 49 insertions(+), 15 deletions(-) diff --git a/tensorflow/core/distributed_runtime/BUILD b/tensorflow/core/distributed_runtime/BUILD index d564727da5..343dd5d456 100644 --- a/tensorflow/core/distributed_runtime/BUILD +++ b/tensorflow/core/distributed_runtime/BUILD @@ -145,6 +145,7 @@ tf_cc_test( deps = [ ":session_mgr", ":worker_env", + "//tensorflow/core:protos_all_cc", "//tensorflow/core:test", "//tensorflow/core:test_main", "//tensorflow/core/distributed_runtime/rpc:rpc_rendezvous_mgr", diff --git a/tensorflow/core/distributed_runtime/master_session.cc b/tensorflow/core/distributed_runtime/master_session.cc index ebe350d313..e3022f38a2 100644 --- a/tensorflow/core/distributed_runtime/master_session.cc +++ b/tensorflow/core/distributed_runtime/master_session.cc @@ -1219,17 +1219,6 @@ Status MasterSession::CreateWorkerSessions( workers[i].name = &worker_names[i]; workers[i].worker = worker_cache->CreateWorker(worker_names[i]); workers[i].request.set_session_handle(handle_); - if (options.cluster_def) { - *workers[i].request.mutable_server_def()->mutable_cluster() = - *options.cluster_def; - workers[i].request.mutable_server_def()->set_protocol(*options.protocol); - // Session state is always isolated when ClusterSpec propagation - // is in use. - workers[i].request.set_isolate_session_state(true); - } else { - workers[i].request.set_isolate_session_state( - session_opts_.config.isolate_session_state()); - } DeviceNameUtils::ParsedName name; if (!DeviceNameUtils::ParseFullName(worker_names[i], &name)) { @@ -1243,8 +1232,21 @@ Status MasterSession::CreateWorkerSessions( return status; } - workers[i].request.mutable_server_def()->set_job_name(name.job); - workers[i].request.mutable_server_def()->set_task_index(name.task); + if (options.cluster_def) { + *workers[i].request.mutable_server_def()->mutable_cluster() = + *options.cluster_def; + workers[i].request.mutable_server_def()->set_protocol(*options.protocol); + workers[i].request.mutable_server_def()->set_job_name(name.job); + workers[i].request.mutable_server_def()->set_task_index(name.task); + // Session state is always isolated when ClusterSpec propagation + // is in use. + workers[i].request.set_isolate_session_state(true); + } else { + // NOTE(mrry): Do not set any component of the ServerDef, + // because the worker will use its local configuration. + workers[i].request.set_isolate_session_state( + session_opts_.config.isolate_session_state()); + } } for (size_t i = 0; i < worker_names.size(); ++i) { diff --git a/tensorflow/core/distributed_runtime/session_mgr.cc b/tensorflow/core/distributed_runtime/session_mgr.cc index 357e9f8930..7ef4206c78 100644 --- a/tensorflow/core/distributed_runtime/session_mgr.cc +++ b/tensorflow/core/distributed_runtime/session_mgr.cc @@ -43,6 +43,7 @@ SessionMgr::SessionMgr( new GraphMgr(worker_env, worker_env->device_mgr)))), worker_cache_factory_(std::move(worker_cache_factory)) {} +/* static */ string SessionMgr::WorkerNameFromServerDef(const ServerDef& server_def) { return strings::StrCat("/job:", server_def.job_name(), "/replica:0/task:", server_def.task_index()); @@ -56,13 +57,14 @@ Status SessionMgr::CreateSession(const string& session, return errors::InvalidArgument("Session must be non-empty."); } - const string worker_name = WorkerNameFromServerDef(server_def); - WorkerCacheInterface* worker_cache = nullptr; + string worker_name; if (server_def.cluster().job().empty()) { worker_cache = new WorkerCacheWrapper(default_worker_cache_.get()); + worker_name = legacy_session_->worker_name; } else { TF_RETURN_IF_ERROR(worker_cache_factory_(server_def, &worker_cache)); + worker_name = WorkerNameFromServerDef(server_def); } if (worker_cache != nullptr & default_worker_cache_.get() != nullptr) { diff --git a/tensorflow/core/distributed_runtime/session_mgr_test.cc b/tensorflow/core/distributed_runtime/session_mgr_test.cc index 0da333833a..99192119a6 100644 --- a/tensorflow/core/distributed_runtime/session_mgr_test.cc +++ b/tensorflow/core/distributed_runtime/session_mgr_test.cc @@ -19,6 +19,7 @@ limitations under the License. #include "tensorflow/core/distributed_runtime/worker_env.h" #include "tensorflow/core/lib/core/status_test_util.h" #include "tensorflow/core/platform/test.h" +#include "tensorflow/core/protobuf/cluster.pb.h" namespace tensorflow { @@ -77,6 +78,34 @@ TEST_F(SessionMgrTest, CreateSessionSimple) { TF_EXPECT_OK(mgr_.DeleteSession(session_handle)); } +TEST_F(SessionMgrTest, CreateSessionClusterDefWorkerName) { + ServerDef server_def; + server_def.set_job_name("worker"); + server_def.set_task_index(3); + auto job = server_def.mutable_cluster()->add_job(); + job->set_name("worker"); + job->mutable_tasks()->insert({3, "localhost:3333"}); + + string session_handle = "test_session_handle"; + TF_EXPECT_OK(mgr_.CreateSession(session_handle, server_def, true)); + std::shared_ptr session; + TF_EXPECT_OK(mgr_.WorkerSessionForSession(session_handle, &session)); + EXPECT_NE(nullptr, session) << "Session for " << session_handle << "was null"; + EXPECT_EQ("/job:worker/replica:0/task:3", session->worker_name); + TF_EXPECT_OK(mgr_.DeleteSession(session_handle)); +} + +TEST_F(SessionMgrTest, CreateSessionDefaultWorkerName) { + ServerDef server_def; + string session_handle = "test_session_handle"; + TF_EXPECT_OK(mgr_.CreateSession(session_handle, server_def, true)); + std::shared_ptr session; + TF_EXPECT_OK(mgr_.WorkerSessionForSession(session_handle, &session)); + EXPECT_NE(nullptr, session) << "Session for " << session_handle << "was null"; + EXPECT_EQ("/job:mnist/replica:0/task:0", session->worker_name); + TF_EXPECT_OK(mgr_.DeleteSession(session_handle)); +} + TEST_F(SessionMgrTest, CreateSessionIsolateSessionState) { ServerDef server_def; server_def.set_job_name("worker"); -- GitLab From c015a45646029f8c116028505f2da9e023b5c2b7 Mon Sep 17 00:00:00 2001 From: Brennan Saeta Date: Fri, 20 Apr 2018 15:51:16 -0700 Subject: [PATCH 103/434] Support legacy clusters PiperOrigin-RevId: 193735742 --- .../cluster_resolver/python/training/tpu_cluster_resolver.py | 2 +- .../python/training/tpu_cluster_resolver_test.py | 3 +-- tensorflow/contrib/tpu/python/tpu/tpu_config.py | 5 +++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py b/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py index 5a2771229d..1403483d28 100644 --- a/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py +++ b/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py @@ -245,7 +245,7 @@ class TPUClusterResolver(ClusterResolver): else: if not self._tpu.startswith(compat.as_bytes('grpc://')): # Case 3. - return server_lib.ClusterSpec({}) + return None # Case 2. cluster_spec = {self._job_name: [self._tpu[len( compat.as_bytes('grpc://')):]]} diff --git a/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver_test.py b/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver_test.py index dff7a03b68..5b3f9be5a1 100644 --- a/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver_test.py +++ b/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver_test.py @@ -356,8 +356,7 @@ class TPUClusterResolverTest(test.TestCase): tpu_cluster_resolver = TPUClusterResolver(tpu='/bns/foo/bar') self.assertEqual( compat.as_bytes('/bns/foo/bar'), tpu_cluster_resolver.master()) - self.assertEqual( - server_lib.ClusterSpec({}), tpu_cluster_resolver.cluster_spec()) + self.assertEqual(None, tpu_cluster_resolver.cluster_spec()) def testGkeEnvironment(self): os.environ['KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS'] = 'grpc://10.120.27.5:8470' diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_config.py b/tensorflow/contrib/tpu/python/tpu/tpu_config.py index cc1a7fd801..6d7331e3c7 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_config.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_config.py @@ -210,8 +210,9 @@ class RunConfig(run_config_lib.RunConfig): raise ValueError( 'You cannot provide a ClusterResolver and ' 'session_config.cluster_def.') - self._session_config.cluster_def.CopyFrom( - self._cluster_spec.as_cluster_def()) + if self._cluster_spec: + self._session_config.cluster_def.CopyFrom( + self._cluster_spec.as_cluster_def()) @property def evaluation_master(self): -- GitLab From a0071844d0af47f22ab512363b56383acf762dff Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 20 Apr 2018 16:05:47 -0700 Subject: [PATCH 104/434] Remove protected data members from GraphOptimizerStage. PiperOrigin-RevId: 193737654 --- .../optimizers/arithmetic_optimizer.cc | 54 +++++++++---------- .../optimizers/graph_optimizer_stage.h | 5 +- 2 files changed, 31 insertions(+), 28 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc index 232132e1e8..ed199c1ac8 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc @@ -294,8 +294,8 @@ class ArithmeticOptimizerStage : public GraphOptimizerStage { for (int i = src->input_size() - 1; i >= 0; --i) { if (IsControlInput(src->input(i))) { *target_node->add_input() = src->input(i); - ctx_.node_map->AddOutput(NodeName(src->input(i)), - target_node->name()); + ctx().node_map->AddOutput(NodeName(src->input(i)), + target_node->name()); } else { break; } @@ -442,7 +442,7 @@ class ArithmeticNodesGroupOptimizerStage : public ArithmeticOptimizerStage { // TODO(ezhulenev): move to GraphOptimizerStage? bool DrivesControlDependency(const NodeDef& node) const { int position; - for (const NodeDef* output : ctx_.node_map->GetOutputs(node.name())) { + for (const NodeDef* output : ctx().node_map->GetOutputs(node.name())) { for (int i = 0; i < output->input_size(); ++i) { auto input = output->input(i); string name = ParseNodeName(input, &position); @@ -476,8 +476,8 @@ class ArithmeticNodesGroupOptimizerStage : public ArithmeticOptimizerStage { } bool IsInPreserveSet(const NodeDef& node) const { - return ctx_.nodes_to_preserve->find(node.name()) != - ctx_.nodes_to_preserve->end(); + return ctx().nodes_to_preserve->find(node.name()) != + ctx().nodes_to_preserve->end(); } bool IsAlreadyOptimized(const NodeDef& node) const { @@ -546,7 +546,7 @@ class AddOpsRewriteStage : public ArithmeticNodesGroupOptimizerStage { // with a single output data consumer (presumably if we reach this node from // previously absorbed or a root node, it means that this node is not used // as an input to any other op, outside of the group) - if (NumNonControlDataOutputs(node, *ctx_.node_map) != 1) { + if (NumNonControlDataOutputs(node, *ctx().node_map) != 1) { return false; } // All input shapes must be broadcastable to the node shape @@ -685,7 +685,7 @@ class AddOpsRewriteStage : public ArithmeticNodesGroupOptimizerStage { (*node->mutable_attr())["N"].set_i(inputs.size()); for (const auto& inputAndShape : inputs) { - ctx_.node_map->AddOutput(inputAndShape.input, node_name); + ctx().node_map->AddOutput(inputAndShape.input, node_name); node->add_input(inputAndShape.input); } @@ -707,8 +707,8 @@ class AddOpsRewriteStage : public ArithmeticNodesGroupOptimizerStage { node->set_device(root_node.device()); (*node->mutable_attr())["T"].set_type(dtype); - ctx_.node_map->AddOutput(left.input, node_name); - ctx_.node_map->AddOutput(right.input, node_name); + ctx().node_map->AddOutput(left.input, node_name); + ctx().node_map->AddOutput(right.input, node_name); node->add_input(left.input); node->add_input(right.input); @@ -784,20 +784,20 @@ class HoistCommonFactorOutOfAggregation : public ArithmeticOptimizerStage { new_outer_node->set_input(1, new_add_node->name()); } - ctx_.node_map->AddOutput(common_factor, new_outer_node->name()); - ctx_.node_map->AddOutput(new_add_node->name(), new_outer_node->name()); + ctx().node_map->AddOutput(common_factor, new_outer_node->name()); + ctx().node_map->AddOutput(new_add_node->name(), new_outer_node->name()); // Hoist non-shared factors up into the new AddN node. for (int i = 0; i < unique_factors.size(); ++i) { const string& unique_factor_i = unique_factors[i]; new_add_node->set_input(i, unique_factor_i); - ctx_.node_map->AddOutput(unique_factor_i, new_add_node->name()); + ctx().node_map->AddOutput(unique_factor_i, new_add_node->name()); } // Add control deps on add node for (const string& ctrl_dep : ctrl_deps) { *new_add_node->add_input() = ctrl_dep; - ctx_.node_map->AddOutput(NodeName(ctrl_dep), new_add_node->name()); + ctx().node_map->AddOutput(NodeName(ctrl_dep), new_add_node->name()); } // optimize new inner aggregation node @@ -931,8 +931,8 @@ class HoistCommonFactorOutOfAggregation : public ArithmeticOptimizerStage { // if graph rewrite happens in multiple passes without graph pruning between // them, it's possible that rewritten node already exists in a graph return rewritten_nodes_.find(node->name()) != rewritten_nodes_.end() || - ctx_.node_map->NodeExists(OuterNodeName(node, false)) || - ctx_.node_map->NodeExists(OuterNodeName(node, true)); + ctx().node_map->NodeExists(OuterNodeName(node, false)) || + ctx().node_map->NodeExists(OuterNodeName(node, true)); } // keep names of the nodes that were optimized by this stage @@ -996,7 +996,7 @@ class MinimizeBroadcasts : public ArithmeticNodesGroupOptimizerStage { } // Optimized nodes updated in place, and that would break the graph, if the // node has multiple output consumers - if (NumNonControlOutputs(node, *ctx_.node_map) != 1) { + if (NumNonControlOutputs(node, *ctx().node_map) != 1) { return false; } // All input shapes must be broadcastable to the node shape @@ -1120,13 +1120,13 @@ class MinimizeBroadcasts : public ArithmeticNodesGroupOptimizerStage { node->set_input(0, input_0); node->set_input(1, input_1); // Invalidate node properties (shape) - ctx_.graph_properties->ClearOutputProperties(node->name()); - ctx_.graph_properties->ClearInputProperties(node->name()); + ctx().graph_properties->ClearOutputProperties(node->name()); + ctx().graph_properties->ClearInputProperties(node->name()); // Update the node map - ctx_.node_map->RemoveOutput(NodeName(old_input_0), node->name()); - ctx_.node_map->RemoveOutput(NodeName(old_input_1), node->name()); - ctx_.node_map->AddOutput(NodeName(input_0), node->name()); - ctx_.node_map->AddOutput(NodeName(input_1), node->name()); + ctx().node_map->RemoveOutput(NodeName(old_input_0), node->name()); + ctx().node_map->RemoveOutput(NodeName(old_input_1), node->name()); + ctx().node_map->AddOutput(NodeName(input_0), node->name()); + ctx().node_map->AddOutput(NodeName(input_1), node->name()); // Add updated node to optimization queue AddToOptimizationQueue(node); } @@ -1257,8 +1257,8 @@ class RemoveRedundantBitcastStage : public ArithmeticOptimizerStage { // Bitcast(Bitcast(x, type1), type2) => Bitcast(x, type2) bitcast->set_input(0, operand->input(0)); SetSourceDataType(GetSourceDataType(*operand), bitcast); - ctx_.node_map->UpdateInput(bitcast->name(), bitcast->input(0), - operand->input(0)); + ctx().node_map->UpdateInput(bitcast->name(), bitcast->input(0), + operand->input(0)); AddToOptimizationQueue(bitcast); *simplified_node_name = bitcast->name(); } @@ -1313,14 +1313,14 @@ class RemoveNegationStage : public ArithmeticOptimizerStage { node->mutable_input()->SwapElements(0, 1); node->set_input(1, x->input(0)); node->add_input(AsControlDependency(x->name())); - ctx_.node_map->AddOutput(NodeName(x->input(0)), node_name); + ctx().node_map->AddOutput(NodeName(x->input(0)), node_name); updated = true; } else if (IsNeg(*y)) { // a + (-b) = a - b node->set_op("Sub"); node->set_input(1, y->input(0)); node->add_input(AsControlDependency(y->name())); - ctx_.node_map->AddOutput(NodeName(y->input(0)), node_name); + ctx().node_map->AddOutput(NodeName(y->input(0)), node_name); updated = true; } } else if (IsSub(*node)) { @@ -1329,7 +1329,7 @@ class RemoveNegationStage : public ArithmeticOptimizerStage { node->set_op("Add"); node->set_input(1, y->input(0)); node->add_input(AsControlDependency(y->name())); - ctx_.node_map->AddOutput(NodeName(y->input(0)), node_name); + ctx().node_map->AddOutput(NodeName(y->input(0)), node_name); updated = true; } } diff --git a/tensorflow/core/grappler/optimizers/graph_optimizer_stage.h b/tensorflow/core/grappler/optimizers/graph_optimizer_stage.h index ed398525f3..089cad36e9 100644 --- a/tensorflow/core/grappler/optimizers/graph_optimizer_stage.h +++ b/tensorflow/core/grappler/optimizers/graph_optimizer_stage.h @@ -182,7 +182,10 @@ class GraphOptimizerStage { return ::tensorflow::grappler::AddEmptyNode(ctx_, name); } - protected: // Data members + protected: + const GraphOptimizerContext& ctx() const { return ctx_; } + + private: // Data members const string optimizer_name_; const string stage_name_; const GraphOptimizerContext ctx_; -- GitLab From cd095e0c455b3df98841ca70ba24fd41935552e7 Mon Sep 17 00:00:00 2001 From: Asim Shankar Date: Fri, 20 Apr 2018 16:18:29 -0700 Subject: [PATCH 105/434] tf.contrib.data.scan: Support eager execution. PiperOrigin-RevId: 193739234 --- .../contrib/data/python/kernel_tests/BUILD | 1 + .../kernel_tests/scan_dataset_op_test.py | 23 ++++++++++++------- .../contrib/data/python/ops/scan_ops.py | 1 + 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD index 05a4f5028a..9d1e8b20c2 100644 --- a/tensorflow/contrib/data/python/kernel_tests/BUILD +++ b/tensorflow/contrib/data/python/kernel_tests/BUILD @@ -343,6 +343,7 @@ py_test( "//tensorflow/python:dtypes", "//tensorflow/python:errors", "//tensorflow/python/data/ops:dataset_ops", + "//tensorflow/python/eager:context", "//third_party/py/numpy", ], ) diff --git a/tensorflow/contrib/data/python/kernel_tests/scan_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/scan_dataset_op_test.py index e0494736b7..1a97a84b2c 100644 --- a/tensorflow/contrib/data/python/kernel_tests/scan_dataset_op_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/scan_dataset_op_test.py @@ -24,9 +24,11 @@ import numpy as np from tensorflow.contrib.data.python.kernel_tests import dataset_serialization_test_base from tensorflow.contrib.data.python.ops import scan_ops from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.eager import context from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors +from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.platform import test @@ -57,19 +59,24 @@ class ScanDatasetTest(test.TestCase): with self.assertRaises(errors.OutOfRangeError): sess.run(next_element) + @test_util.run_in_graph_and_eager_modes() def testFibonacci(self): iterator = dataset_ops.Dataset.from_tensors(1).repeat(None).apply( scan_ops.scan([0, 1], lambda a, _: ([a[1], a[0] + a[1]], a[1])) ).make_one_shot_iterator() - next_element = iterator.get_next() - with self.test_session() as sess: - self.assertEqual(1, sess.run(next_element)) - self.assertEqual(1, sess.run(next_element)) - self.assertEqual(2, sess.run(next_element)) - self.assertEqual(3, sess.run(next_element)) - self.assertEqual(5, sess.run(next_element)) - self.assertEqual(8, sess.run(next_element)) + if context.executing_eagerly(): + next_element = iterator.get_next + else: + get_next = iterator.get_next() + next_element = lambda: get_next + + self.assertEqual(1, self.evaluate(next_element())) + self.assertEqual(1, self.evaluate(next_element())) + self.assertEqual(2, self.evaluate(next_element())) + self.assertEqual(3, self.evaluate(next_element())) + self.assertEqual(5, self.evaluate(next_element())) + self.assertEqual(8, self.evaluate(next_element())) def testChangingStateShape(self): # Test the fixed-point shape invariant calculations: start with diff --git a/tensorflow/contrib/data/python/ops/scan_ops.py b/tensorflow/contrib/data/python/ops/scan_ops.py index 1c88366273..711a538697 100644 --- a/tensorflow/contrib/data/python/ops/scan_ops.py +++ b/tensorflow/contrib/data/python/ops/scan_ops.py @@ -144,6 +144,7 @@ class _ScanDataset(dataset_ops.Dataset): weakened_state_shapes) self._scan_func = tf_scan_func + self._scan_func.add_to_graph(ops.get_default_graph()) def _as_variant_tensor(self): input_t = self._input_dataset._as_variant_tensor() # pylint: disable=protected-access -- GitLab From 8d3a41f459b776856ff668bb076d4bc449927e09 Mon Sep 17 00:00:00 2001 From: Yunxing Dai Date: Fri, 20 Apr 2018 16:30:02 -0700 Subject: [PATCH 106/434] [XLA] Remove constant cast in literal util. It's not portable to modify an underlying char array of a c++ string object: (https://stackoverflow.com/questions/5729203/modifying-underlying-char-array-of-a-c-string-object) RELNOTES: n/a PiperOrigin-RevId: 193740595 --- tensorflow/compiler/xla/literal_util.cc | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/tensorflow/compiler/xla/literal_util.cc b/tensorflow/compiler/xla/literal_util.cc index c315b4ff30..bb6dd4f909 100644 --- a/tensorflow/compiler/xla/literal_util.cc +++ b/tensorflow/compiler/xla/literal_util.cc @@ -44,8 +44,16 @@ namespace { constexpr bool kLittleEndian = __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__; -// Converts between little and big endian, assuming elements in the array are 16 -// bits long. +// Converts between little and big endian. +// +// Precondition: size % 2 == 0 (elements in the array are 16 bits long) +void ConvertEndianShort(string* bytes) { + CHECK_EQ(bytes->size() / 2, 0); + for (int64 i = 0; i < bytes->size(); i += 2) { + std::swap((*bytes)[i], (*bytes)[i + 1]); + } +} + void ConvertEndianShort(char* bytes, int64 size) { CHECK_EQ(size / 2, 0); for (int64 i = 0; i < size; i += 2) { @@ -1930,16 +1938,14 @@ void Literal::Piece::WriteToProto(LiteralProto* proto) const { *proto->mutable_f16s() = string( reinterpret_cast(data().data()), size_bytes()); if (!kLittleEndian) { - ConvertEndianShort(const_cast(proto->mutable_f16s()->data()), - proto->f16s().size()); + ConvertEndianShort(proto->mutable_f16s()); } break; case BF16: *proto->mutable_bf16s() = string( reinterpret_cast(data().data()), size_bytes()); if (!kLittleEndian) { - ConvertEndianShort(const_cast(proto->mutable_bf16s()->data()), - proto->bf16s().size()); + ConvertEndianShort(proto->mutable_bf16s()); } break; case F32: -- GitLab From 16f0a5bb2aed8d0e605004b421a9cd6f32e37f94 Mon Sep 17 00:00:00 2001 From: Asim Shankar Date: Fri, 20 Apr 2018 16:48:44 -0700 Subject: [PATCH 107/434] Java: Bump release to 1.8.0-rc1 PiperOrigin-RevId: 193742798 --- tensorflow/java/maven/libtensorflow/pom.xml | 2 +- tensorflow/java/maven/libtensorflow_jni/pom.xml | 2 +- tensorflow/java/maven/libtensorflow_jni_gpu/pom.xml | 2 +- tensorflow/java/maven/pom.xml | 2 +- tensorflow/java/maven/proto/pom.xml | 2 +- tensorflow/java/maven/tensorflow/pom.xml | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tensorflow/java/maven/libtensorflow/pom.xml b/tensorflow/java/maven/libtensorflow/pom.xml index 9c1601753b..66985e3b18 100644 --- a/tensorflow/java/maven/libtensorflow/pom.xml +++ b/tensorflow/java/maven/libtensorflow/pom.xml @@ -6,7 +6,7 @@ org.tensorflow parentpom - 1.8.0-rc0 + 1.8.0-rc1 ../ libtensorflow diff --git a/tensorflow/java/maven/libtensorflow_jni/pom.xml b/tensorflow/java/maven/libtensorflow_jni/pom.xml index 3d013e12b0..34d4ba0b08 100644 --- a/tensorflow/java/maven/libtensorflow_jni/pom.xml +++ b/tensorflow/java/maven/libtensorflow_jni/pom.xml @@ -6,7 +6,7 @@ org.tensorflow parentpom - 1.8.0-rc0 + 1.8.0-rc1 ../ libtensorflow_jni diff --git a/tensorflow/java/maven/libtensorflow_jni_gpu/pom.xml b/tensorflow/java/maven/libtensorflow_jni_gpu/pom.xml index 40e44af1f5..1909d08e41 100644 --- a/tensorflow/java/maven/libtensorflow_jni_gpu/pom.xml +++ b/tensorflow/java/maven/libtensorflow_jni_gpu/pom.xml @@ -6,7 +6,7 @@ org.tensorflow parentpom - 1.8.0-rc0 + 1.8.0-rc1 ../ libtensorflow_jni_gpu diff --git a/tensorflow/java/maven/pom.xml b/tensorflow/java/maven/pom.xml index 82bfd0c73a..ba98732f5a 100644 --- a/tensorflow/java/maven/pom.xml +++ b/tensorflow/java/maven/pom.xml @@ -6,7 +6,7 @@ 4.0.0 org.tensorflow parentpom - 1.8.0-rc0 + 1.8.0-rc1 pom https://www.tensorflow.org diff --git a/tensorflow/java/maven/proto/pom.xml b/tensorflow/java/maven/proto/pom.xml index 0a2775a500..dee8c34359 100644 --- a/tensorflow/java/maven/proto/pom.xml +++ b/tensorflow/java/maven/proto/pom.xml @@ -6,7 +6,7 @@ org.tensorflow parentpom - 1.8.0-rc0 + 1.8.0-rc1 ../ proto diff --git a/tensorflow/java/maven/tensorflow/pom.xml b/tensorflow/java/maven/tensorflow/pom.xml index 61961432a7..95e024ace9 100644 --- a/tensorflow/java/maven/tensorflow/pom.xml +++ b/tensorflow/java/maven/tensorflow/pom.xml @@ -6,7 +6,7 @@ org.tensorflow parentpom - 1.8.0-rc0 + 1.8.0-rc1 ../ tensorflow -- GitLab From 0385bfe0726ad9710bfcca145e19611e9e2391bb Mon Sep 17 00:00:00 2001 From: Mustafa Ispir Date: Fri, 20 Apr 2018 17:03:14 -0700 Subject: [PATCH 108/434] Let estimators to be used when eager is enabled. PiperOrigin-RevId: 193744371 --- tensorflow/python/estimator/estimator.py | 283 +++++++++--------- tensorflow/python/estimator/estimator_test.py | 1 + 2 files changed, 143 insertions(+), 141 deletions(-) diff --git a/tensorflow/python/estimator/estimator.py b/tensorflow/python/estimator/estimator.py index 9862fdecdb..351fcb6423 100644 --- a/tensorflow/python/estimator/estimator.py +++ b/tensorflow/python/estimator/estimator.py @@ -100,10 +100,6 @@ class Estimator(object): None of `Estimator`'s methods can be overridden in subclasses (its constructor enforces this). Subclasses should use `model_fn` to configure the base class, and may add methods implementing specialized functionality. - - @compatibility(eager) - Estimators are not compatible with eager execution. - @end_compatibility """ def __init__(self, model_fn, model_dir=None, config=None, params=None, @@ -166,15 +162,10 @@ class Estimator(object): vocabularies and Tensor names are unchanged. Raises: - RuntimeError: If eager execution is enabled. ValueError: parameters of `model_fn` don't match `params`. ValueError: if this is called via a subclass and if that class overrides a member of `Estimator`. """ - if context.executing_eagerly(): - raise RuntimeError( - 'Estimators are not supported when eager execution is enabled.') - Estimator._assert_members_are_not_overridden(self) if config is None: @@ -269,7 +260,8 @@ class Estimator(object): ValueError: If the Estimator has not produced a checkpoint yet. """ _check_checkpoint_available(self.model_dir) - return training.load_variable(self.model_dir, name) + with context.graph_mode(): + return training.load_variable(self.model_dir, name) def get_variable_names(self): """Returns list of all variable names in this model. @@ -281,7 +273,8 @@ class Estimator(object): ValueError: If the Estimator has not produced a checkpoint yet. """ _check_checkpoint_available(self.model_dir) - return [name for name, _ in training.list_variables(self.model_dir)] + with context.graph_mode(): + return [name for name, _ in training.list_variables(self.model_dir)] def latest_checkpoint(self): """Finds the filename of latest saved checkpoint file in `model_dir`. @@ -290,7 +283,8 @@ class Estimator(object): The full path to the latest checkpoint or `None` if no checkpoint was found. """ - return saver.latest_checkpoint(self.model_dir) + with context.graph_mode(): + return saver.latest_checkpoint(self.model_dir) def train(self, input_fn, @@ -342,27 +336,28 @@ class Estimator(object): ValueError: If both `steps` and `max_steps` are not `None`. ValueError: If either `steps` or `max_steps` is <= 0. """ - if (steps is not None) and (max_steps is not None): - raise ValueError('Can not provide both steps and max_steps.') - if steps is not None and steps <= 0: - raise ValueError('Must specify steps > 0, given: {}'.format(steps)) - if max_steps is not None and max_steps <= 0: - raise ValueError( - 'Must specify max_steps > 0, given: {}'.format(max_steps)) + with context.graph_mode(): + if (steps is not None) and (max_steps is not None): + raise ValueError('Can not provide both steps and max_steps.') + if steps is not None and steps <= 0: + raise ValueError('Must specify steps > 0, given: {}'.format(steps)) + if max_steps is not None and max_steps <= 0: + raise ValueError( + 'Must specify max_steps > 0, given: {}'.format(max_steps)) - if max_steps is not None: - start_step = _load_global_step_from_checkpoint_dir(self._model_dir) - if max_steps <= start_step: - logging.info('Skipping training since max_steps has already saved.') - return self + if max_steps is not None: + start_step = _load_global_step_from_checkpoint_dir(self._model_dir) + if max_steps <= start_step: + logging.info('Skipping training since max_steps has already saved.') + return self - hooks = _check_hooks_type(hooks) - hooks.extend(self._convert_train_steps_to_hooks(steps, max_steps)) + hooks = _check_hooks_type(hooks) + hooks.extend(self._convert_train_steps_to_hooks(steps, max_steps)) - saving_listeners = _check_listeners_type(saving_listeners) - loss = self._train_model(input_fn, hooks, saving_listeners) - logging.info('Loss for final step: %s.', loss) - return self + saving_listeners = _check_listeners_type(saving_listeners) + loss = self._train_model(input_fn, hooks, saving_listeners) + logging.info('Loss for final step: %s.', loss) + return self def _convert_train_steps_to_hooks(self, steps, max_steps): if steps is not None or max_steps is not None: @@ -415,14 +410,15 @@ class Estimator(object): ValueError: If no model has been trained, namely `model_dir`, or the given `checkpoint_path` is empty. """ - hooks = _check_hooks_type(hooks) - hooks.extend(self._convert_eval_steps_to_hooks(steps)) + with context.graph_mode(): + hooks = _check_hooks_type(hooks) + hooks.extend(self._convert_eval_steps_to_hooks(steps)) - return self._evaluate_model( - input_fn=input_fn, - hooks=hooks, - checkpoint_path=checkpoint_path, - name=name) + return self._evaluate_model( + input_fn=input_fn, + hooks=hooks, + checkpoint_path=checkpoint_path, + name=name) def _convert_eval_steps_to_hooks(self, steps): if steps is None: @@ -479,45 +475,48 @@ class Estimator(object): `predictions`. For example if `predict_keys` is not `None` but `EstimatorSpec.predictions` is not a `dict`. """ - hooks = _check_hooks_type(hooks) - # Check that model has been trained. - if not checkpoint_path: - checkpoint_path = saver.latest_checkpoint(self._model_dir) - if not checkpoint_path: - raise ValueError('Could not find trained model in model_dir: {}.'.format( - self._model_dir)) + with context.graph_mode(): + hooks = _check_hooks_type(hooks) + # Check that model has been trained. + if not checkpoint_path: + checkpoint_path = saver.latest_checkpoint(self._model_dir) + if not checkpoint_path: + raise ValueError( + 'Could not find trained model in model_dir: {}.'.format( + self._model_dir)) - with ops.Graph().as_default() as g: - random_seed.set_random_seed(self._config.tf_random_seed) - self._create_and_assert_global_step(g) - features, input_hooks = self._get_features_from_input_fn( - input_fn, model_fn_lib.ModeKeys.PREDICT) - estimator_spec = self._call_model_fn( - features, None, model_fn_lib.ModeKeys.PREDICT, self.config) - predictions = self._extract_keys(estimator_spec.predictions, predict_keys) - all_hooks = list(input_hooks) - all_hooks.extend(hooks) - all_hooks.extend(list(estimator_spec.prediction_hooks or [])) - with training.MonitoredSession( - session_creator=training.ChiefSessionCreator( - checkpoint_filename_with_path=checkpoint_path, - master=self._config.master, - scaffold=estimator_spec.scaffold, - config=self._session_config), - hooks=all_hooks) as mon_sess: - while not mon_sess.should_stop(): - preds_evaluated = mon_sess.run(predictions) - if not yield_single_examples: - yield preds_evaluated - elif not isinstance(predictions, dict): - for pred in preds_evaluated: - yield pred - else: - for i in range(self._extract_batch_length(preds_evaluated)): - yield { - key: value[i] - for key, value in six.iteritems(preds_evaluated) - } + with ops.Graph().as_default() as g: + random_seed.set_random_seed(self._config.tf_random_seed) + self._create_and_assert_global_step(g) + features, input_hooks = self._get_features_from_input_fn( + input_fn, model_fn_lib.ModeKeys.PREDICT) + estimator_spec = self._call_model_fn( + features, None, model_fn_lib.ModeKeys.PREDICT, self.config) + predictions = self._extract_keys( + estimator_spec.predictions, predict_keys) + all_hooks = list(input_hooks) + all_hooks.extend(hooks) + all_hooks.extend(list(estimator_spec.prediction_hooks or [])) + with training.MonitoredSession( + session_creator=training.ChiefSessionCreator( + checkpoint_filename_with_path=checkpoint_path, + master=self._config.master, + scaffold=estimator_spec.scaffold, + config=self._session_config), + hooks=all_hooks) as mon_sess: + while not mon_sess.should_stop(): + preds_evaluated = mon_sess.run(predictions) + if not yield_single_examples: + yield preds_evaluated + elif not isinstance(predictions, dict): + for pred in preds_evaluated: + yield pred + else: + for i in range(self._extract_batch_length(preds_evaluated)): + yield { + key: value[i] + for key, value in six.iteritems(preds_evaluated) + } def _assert_members_are_not_overridden(self): """Asserts members of `Estimator` are not overridden.""" @@ -597,73 +596,75 @@ class Estimator(object): are provided, or no checkpoint can be found. """ # pylint: enable=line-too-long - if serving_input_receiver_fn is None: - raise ValueError('serving_input_receiver_fn must be defined.') - - with ops.Graph().as_default() as g: - self._create_and_assert_global_step(g) - random_seed.set_random_seed(self._config.tf_random_seed) - serving_input_receiver = serving_input_receiver_fn() + with context.graph_mode(): + if serving_input_receiver_fn is None: + raise ValueError('serving_input_receiver_fn must be defined.') - # Call the model_fn and collect the export_outputs. - estimator_spec = self._call_model_fn( - features=serving_input_receiver.features, - labels=None, - mode=model_fn_lib.ModeKeys.PREDICT, - config=self.config) - - # Build the SignatureDefs from receivers and all outputs - signature_def_map = build_all_signature_defs( - serving_input_receiver.receiver_tensors, - estimator_spec.export_outputs, - serving_input_receiver.receiver_tensors_alternatives) - - if not checkpoint_path: - # Locate the latest checkpoint - checkpoint_path = saver.latest_checkpoint(self._model_dir) - if not checkpoint_path: - raise ValueError("Couldn't find trained model at %s." % self._model_dir) - - export_dir = get_timestamped_export_dir(export_dir_base) - temp_export_dir = get_temp_export_dir(export_dir) - - # TODO(soergel): Consider whether MonitoredSession makes sense here - with tf_session.Session(config=self._session_config) as session: - - saver_for_restore = estimator_spec.scaffold.saver or saver.Saver( - sharded=True) - saver_for_restore.restore(session, checkpoint_path) - - # pylint: disable=protected-access - local_init_op = ( - estimator_spec.scaffold.local_init_op or - monitored_session.Scaffold._default_local_init_op()) - # pylint: enable=protected-access - - # Perform the export - builder = saved_model_builder.SavedModelBuilder(temp_export_dir) - builder.add_meta_graph_and_variables( - session, [tag_constants.SERVING], - signature_def_map=signature_def_map, - assets_collection=ops.get_collection( - ops.GraphKeys.ASSET_FILEPATHS), - legacy_init_op=local_init_op, - strip_default_attrs=strip_default_attrs) - builder.save(as_text) - - # Add the extra assets - if assets_extra: - assets_extra_path = os.path.join(compat.as_bytes(temp_export_dir), - compat.as_bytes('assets.extra')) - for dest_relative, source in assets_extra.items(): - dest_absolute = os.path.join(compat.as_bytes(assets_extra_path), - compat.as_bytes(dest_relative)) - dest_path = os.path.dirname(dest_absolute) - gfile.MakeDirs(dest_path) - gfile.Copy(source, dest_absolute) - - gfile.Rename(temp_export_dir, export_dir) - return export_dir + with ops.Graph().as_default() as g: + self._create_and_assert_global_step(g) + random_seed.set_random_seed(self._config.tf_random_seed) + serving_input_receiver = serving_input_receiver_fn() + + # Call the model_fn and collect the export_outputs. + estimator_spec = self._call_model_fn( + features=serving_input_receiver.features, + labels=None, + mode=model_fn_lib.ModeKeys.PREDICT, + config=self.config) + + # Build the SignatureDefs from receivers and all outputs + signature_def_map = build_all_signature_defs( + serving_input_receiver.receiver_tensors, + estimator_spec.export_outputs, + serving_input_receiver.receiver_tensors_alternatives) + + if not checkpoint_path: + # Locate the latest checkpoint + checkpoint_path = saver.latest_checkpoint(self._model_dir) + if not checkpoint_path: + raise ValueError( + "Couldn't find trained model at %s." % self._model_dir) + + export_dir = get_timestamped_export_dir(export_dir_base) + temp_export_dir = get_temp_export_dir(export_dir) + + # TODO(soergel): Consider whether MonitoredSession makes sense here + with tf_session.Session(config=self._session_config) as session: + + saver_for_restore = estimator_spec.scaffold.saver or saver.Saver( + sharded=True) + saver_for_restore.restore(session, checkpoint_path) + + # pylint: disable=protected-access + local_init_op = ( + estimator_spec.scaffold.local_init_op or + monitored_session.Scaffold._default_local_init_op()) + # pylint: enable=protected-access + + # Perform the export + builder = saved_model_builder.SavedModelBuilder(temp_export_dir) + builder.add_meta_graph_and_variables( + session, [tag_constants.SERVING], + signature_def_map=signature_def_map, + assets_collection=ops.get_collection( + ops.GraphKeys.ASSET_FILEPATHS), + legacy_init_op=local_init_op, + strip_default_attrs=strip_default_attrs) + builder.save(as_text) + + # Add the extra assets + if assets_extra: + assets_extra_path = os.path.join(compat.as_bytes(temp_export_dir), + compat.as_bytes('assets.extra')) + for dest_relative, source in assets_extra.items(): + dest_absolute = os.path.join(compat.as_bytes(assets_extra_path), + compat.as_bytes(dest_relative)) + dest_path = os.path.dirname(dest_absolute) + gfile.MakeDirs(dest_path) + gfile.Copy(source, dest_absolute) + + gfile.Rename(temp_export_dir, export_dir) + return export_dir def _get_features_from_input_fn(self, input_fn, mode): """Extracts the `features` from return values of `input_fn`.""" diff --git a/tensorflow/python/estimator/estimator_test.py b/tensorflow/python/estimator/estimator_test.py index f4255091bf..d453e19357 100644 --- a/tensorflow/python/estimator/estimator_test.py +++ b/tensorflow/python/estimator/estimator_test.py @@ -2287,6 +2287,7 @@ class EstimatorHookOrderingTest(test.TestCase): class EstimatorIntegrationTest(test.TestCase): + @test_util.run_in_graph_and_eager_modes() def test_complete_flow_with_a_simple_linear_model(self): def _model_fn(features, labels, mode): -- GitLab From 2591a66ab804b73f55c1c7a0b105744f94d8a02e Mon Sep 17 00:00:00 2001 From: Russell Power Date: Fri, 20 Apr 2018 17:55:01 -0700 Subject: [PATCH 109/434] Automated g4 rollback of changelist 193717076 PiperOrigin-RevId: 193749007 --- tensorflow/contrib/tpu/BUILD | 1 + .../contrib/tpu/python/tpu/keras_support.py | 391 ++++++++++++++++++ 2 files changed, 392 insertions(+) create mode 100644 tensorflow/contrib/tpu/python/tpu/keras_support.py diff --git a/tensorflow/contrib/tpu/BUILD b/tensorflow/contrib/tpu/BUILD index 9646d15486..eac210418b 100644 --- a/tensorflow/contrib/tpu/BUILD +++ b/tensorflow/contrib/tpu/BUILD @@ -162,6 +162,7 @@ py_library( "python/tpu/__init__.py", "python/tpu/bfloat16.py", "python/tpu/device_assignment.py", + "python/tpu/keras_support.py", "python/tpu/topology.py", "python/tpu/tpu.py", "python/tpu/tpu_feed.py", diff --git a/tensorflow/contrib/tpu/python/tpu/keras_support.py b/tensorflow/contrib/tpu/python/tpu/keras_support.py new file mode 100644 index 0000000000..e86ca0a1d8 --- /dev/null +++ b/tensorflow/contrib/tpu/python/tpu/keras_support.py @@ -0,0 +1,391 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""*Experimental* support for running Keras models on the TPU. + +To use, wrap your model with the `keras_support.tpu_model` function. + +Example usage: + +``` +# Must activate before building TPU models +keras_support.setup_tpu_session(master_address) + +image = tf.keras.layers.Input(shape=(28, 28, 3), name='image') +c1 = tf.keras.layers.Conv2D(filters=16, kernel_size=(3, 3))( image) +flattened = tf.keras.layers.Flatten()(c1) +logits = tf.keras.layers.Dense(10, activation='softmax')(flattened) +model = tf.keras.Model(inputs=[image], outputs=[logits]) +model = keras_support.tpu_model(model) + +# Only TF optimizers are currently supported. +model.compile(optimizer=tf.train.AdamOptimizer(), ...) + +# `images` and `labels` should be Numpy arrays. Support for tensor input +# (e.g. datasets) is planned. +model.fit(images, labels) + +# Invoke before shutting down +keras_support.shutdown_tpu_session() +``` +""" + +# pylint: disable=protected-access + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections +import re + +from tensorflow.contrib.framework.python.framework import experimental +from tensorflow.contrib.tpu.python.ops import tpu_ops +from tensorflow.contrib.tpu.python.tpu import tpu +from tensorflow.core.protobuf import config_pb2 +from tensorflow.python.client import session as tf_session +from tensorflow.python.estimator import model_fn as model_fn_lib +from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_spec +from tensorflow.python.keras._impl.keras import backend as K +from tensorflow.python.keras._impl.keras import layers +from tensorflow.python.keras._impl.keras import models +from tensorflow.python.keras._impl.keras import optimizers as keras_optimizers +from tensorflow.python.keras._impl.keras.layers import embeddings +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.platform import tf_logging as logging +from tensorflow.python.training import training_util + + +class TPUEmbedding(embeddings.Embedding): + """TPU compatible embedding layer. + + The default Keras layer is not TPU compatible. This layer is a drop-in + replacement: it has the same behavior and will work on CPU and GPU devices. + """ + + def __init__(self, *args, **kw): + super(TPUEmbedding, self).__init__(*args, **kw) + + def build(self, input_shape): + if input_shape[0] is None: + raise ValueError( + 'TPUEmbeddings must have a fixed input_length or input shape.') + return super(TPUEmbedding, self).build(input_shape) + + def call(self, inputs): + if K.dtype(inputs) != 'int32': + inputs = math_ops.cast(inputs, 'int32') + + inputs = array_ops.one_hot(inputs, self.input_dim) + return math_ops.tensordot(inputs, self.embeddings, 1) + + +class CompiledTPUOp( + collections.namedtuple( + 'CompiledTPUOp', + ['tpu_execute_op', 'infeed_tensors', 'infeed_op', 'outfeed_op'])): + pass + + +def _valid_name(tensor_name): + """Return a valid tensor name (strips '/', ':', etc).""" + return re.sub('[^a-zA-Z0-9_-]+', '', tensor_name) + + +class TPUFunction(object): + """K.function compatible interface for invoking a TPU compiled function. + + Recompilation is triggered on-demand for each set of new inputs shapes: the + results are cached for future execution. We expect most computations will + be dominated by a standard batch-size, followed by a straggler batch for + the end of training or evaluation. + + All `inputs` and `outputs` will be loaded via the infeed and outfeed queues + instead of being injected as `feed_dict` items or fetches. + """ + + def __init__(self, model, execution_mode): + self.model = model + self.execution_mode = execution_mode + self._compilation_cache = {} + + def _specialize_model(self, input_specs): + """Specialize `self.model` (a Keras model) for the given input shapes.""" + # Re-create our input and output layers inside our subgraph. They will be + # attached to the true computation when we clone our model in `tpu_fn`. + K.set_learning_phase(self.execution_mode == model_fn_lib.ModeKeys.TRAIN) + + # functools.partial and callable objects are not supported by tpu.rewrite + def _model_fn(): + """Compute fit/eval/predict for the TPU.""" + is_training = self.execution_mode == model_fn_lib.ModeKeys.TRAIN + is_test = self.execution_mode == model_fn_lib.ModeKeys.EVAL + is_predict = self.execution_mode == model_fn_lib.ModeKeys.PREDICT + + # During train/eval, we infeed our features as well as labels. + if is_training or is_test: + infeed_layers = self.model._input_layers + self.model._output_layers + else: + infeed_layers = self.model._input_layers + + # Generate our infeed operation to read features & labels. + infeed_tensors = tpu_ops.infeed_dequeue_tuple( + dtypes=[spec.dtype for spec in input_specs], + shapes=[spec.shape for spec in input_specs], + name='infeed-%s' % self.execution_mode) + + assert len(infeed_tensors) == len(infeed_layers), ( + 'Infeed inputs did not match model: %s vs %s', (infeed_layers, + infeed_tensors)) + + tpu_targets = [] + tpu_inputs = [] + + # Sort infeed outputs into inputs and labels for calling our Keras model. + for tensor, layer in zip(infeed_tensors, infeed_layers): + if layer in self.model._input_layers: + tpu_inputs.append(layers.Input(name=layer.name, tensor=tensor)) + if layer in self.model._output_layers: + tpu_targets.append(tensor) + + optimizer = self.model.optimizer + optimizer.iterations = training_util.get_or_create_global_step() + + # Call our model with our infeed inputs (re-using the weights). + model_outputs = self.model(tpu_inputs) + child_model = models.Model(inputs=tpu_inputs, outputs=model_outputs) + if is_training or is_test: + child_model.compile( + optimizer=self.model.optimizer, + loss=self.model.loss, + loss_weights=self.model.loss_weights, + metrics=self.model.metrics, + weighted_metrics=self.model.weighted_metrics, + target_tensors=tpu_targets, + ) + + # Compute our outfeed depending on the execution mode + if is_training: + child_model._make_train_function() + self._outfeed_spec = [ + tensor_spec.TensorSpec(tensor.shape, tensor.dtype, tensor.name) + for tensor in child_model.train_function.outputs + ] + return [ + child_model.train_function.updates_op, + tpu_ops.outfeed_enqueue_tuple( + child_model.train_function.outputs, name='oufeed-enqueue-train') + ] + elif is_test: + child_model._make_test_function() + self._outfeed_spec = [ + tensor_spec.TensorSpec(tensor.shape, tensor.dtype, tensor.name) + for tensor in child_model.test_function.outputs + ] + return [ + tpu_ops.outfeed_enqueue_tuple( + child_model.test_function.outputs, name='outfeed-enqueue-test') + ] + elif is_predict: + child_model._make_predict_function() + self._outfeed_spec = [ + tensor_spec.TensorSpec(tensor.shape, tensor.dtype, tensor.name) + for tensor in child_model.predict_function.outputs + ] + return [ + tpu_ops.outfeed_enqueue_tuple( + child_model.predict_function.outputs, + name='outfeed-enqueue-predict', + ) + ] + else: + assert False, 'Unexpected execution mode: %s' % self.execution_mode + + # Capture outfeed metadata computed during the rewrite. + self._outfeed_spec = None + + tpu_execute_op = tpu.rewrite(_model_fn) + + K._initialize_variables(K.get_session()) # pylint-disable: protected-access + + # Generate CPU side operations to enqueue features/labels and dequeue + # outputs from the model call. + with ops.device('/device:TPU:0'): + infeed_tensors = [] + for spec in input_specs: + infeed_tensors.append( + array_ops.placeholder( + dtype=spec.dtype, + shape=spec.shape, + name='infeed-enqueue-%s' % spec.name)) + + infeed_op = tpu_ops.infeed_enqueue_tuple( + infeed_tensors, [spec.shape for spec in input_specs], + name='infeed-enqueue-%s' % self.execution_mode) + + outfeed_op = tpu_ops.outfeed_dequeue_tuple( + dtypes=[spec.dtype for spec in self._outfeed_spec], + shapes=[spec.shape for spec in self._outfeed_spec], + name='outfeed-dequeue-%s' % self.execution_mode) + + return CompiledTPUOp(tpu_execute_op, infeed_tensors, infeed_op, outfeed_op) + + def __call__(self, inputs): + assert isinstance(inputs, list) + + # Strip sample weight from inputs + if (self.execution_mode == model_fn_lib.ModeKeys.TRAIN or + self.execution_mode == model_fn_lib.ModeKeys.EVAL): + input_tensors = self.model._feed_inputs + self.model._feed_targets + inputs = inputs[:len(input_tensors)] + else: + input_tensors = self.model._feed_inputs + + # Compute an input specification (used to generate infeed enqueue and + # dequeue operations). We use the shape from our input array and the + # dtype from our model. A user may pass in a float64 for a float32 + # input: for model compatibility we still must generate a float32 infeed. + input_specs = [] + for tensor, ary in zip(input_tensors, inputs): + input_specs.append( + tensor_spec.TensorSpec(ary.shape, tensor.dtype, + _valid_name(tensor.name))) + + # XLA requires every operation in the graph has a fixed shape. To + # handle varying batch sizes we recompile a new sub-graph for each + # unique input shape. + shape_key = tuple([tuple(spec.shape.as_list()) for spec in input_specs]) + + if shape_key not in self._compilation_cache: + logging.info('New input shapes; (re-)compiling: mode=%s, %s', + self.execution_mode, input_specs) + self._compilation_cache[shape_key] = self._specialize_model(input_specs) + + compiled_model = self._compilation_cache[shape_key] + + infeed_dict = {} + for tensor, value in zip(compiled_model.infeed_tensors, inputs): + infeed_dict[tensor] = value + + session = K.get_session() + _, _, outfeed_outputs = session.run([ + compiled_model.infeed_op, compiled_model.tpu_execute_op, + compiled_model.outfeed_op + ], infeed_dict) + + return outfeed_outputs + + +@experimental +def setup_tpu_session(master): + """Initializes and returns a Keras/TF session connected the TPU `master`.""" + session = tf_session.Session( + target=master, config=config_pb2.ConfigProto(isolate_session_state=True)) + K.set_session(session) + K.get_session().run(tpu.initialize_system()) + K.manual_variable_initialization(True) + return session + + +@experimental +def shutdown_tpu_session(session=None): + """Shutdown the TPU attached to session. + + This should be called to cleanly shut down the TPU system before the client + exits. + + Args: + session: Session to shutdown, or None to use the default session. + + Returns: + + """ + if session is None: + session = K.get_session() + + session.run(tpu.shutdown_system()) + + +class KerasTPUModel(models.Model): + """TPU compatible Keras model wrapper.""" + + def __init__(self, inputs, outputs, name=None): + super(models.Model, self).__init__( + inputs=inputs, + outputs=outputs, + name=name, + ) + self.predict_function = None + self.test_function = None + self.train_function = None + + def compile(self, + optimizer, + loss=None, + metrics=None, + loss_weights=None, + sample_weight_mode=None, + weighted_metrics=None, + target_tensors=None, + **kwargs): + if sample_weight_mode: + raise ValueError('sample_weight_mode not supported for TPU execution.') + if weighted_metrics: + raise ValueError('weighted_metrics not supported for TPU execution.') + if target_tensors: + raise ValueError('target_tensors is not supported for TPU execution.') + + super(KerasTPUModel, self).compile(optimizer, loss, metrics, loss_weights, + sample_weight_mode, weighted_metrics, + target_tensors, **kwargs) + + # Keras optimizers are not compatible with TPU rewrite + if not isinstance(self.optimizer, keras_optimizers.TFOptimizer): + raise ValueError( + 'Optimizer must be a TFOptimizer, got: %s' % self.optimizer) + + def train_on_batch(self, x, y, sample_weight=None, class_weight=None): + return super(KerasTPUModel, self).train_on_batch(x, y, sample_weight, + class_weight) + + def _make_train_function(self): + if not self.train_function: + self.train_function = TPUFunction(self, model_fn_lib.ModeKeys.TRAIN) + + return self.train_function + + def _make_test_function(self): + if not self.test_function: + self.test_function = TPUFunction(self, model_fn_lib.ModeKeys.EVAL) + return self.test_function + + def _make_predict_function(self): + if not self.predict_function: + self.predict_function = TPUFunction(self, model_fn_lib.ModeKeys.PREDICT) + return self.predict_function + + def cpu_model(self): + return models.Model( + inputs=self.inputs, + outputs=self.outputs, + name=self.name, + ) + + +@experimental +def tpu_model(model): + return KerasTPUModel( + inputs=model.inputs, outputs=model.outputs, name=model.name) -- GitLab From 7cf9b65492121961f98481fa06a0398698c6c0a3 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 20 Apr 2018 18:29:01 -0700 Subject: [PATCH 110/434] Automated g4 rollback of changelist 193605910 PiperOrigin-RevId: 193751624 --- tensorflow/core/grappler/optimizers/BUILD | 4 - .../grappler/optimizers/function_optimizer.cc | 126 +------ .../grappler/optimizers/function_optimizer.h | 6 +- .../optimizers/function_optimizer_test.cc | 32 +- .../grappler/optimizers/meta_optimizer.cc | 330 +++++++----------- .../core/grappler/optimizers/meta_optimizer.h | 33 +- .../optimizers/meta_optimizer_test.cc | 172 +-------- tensorflow/core/grappler/utils/functions.cc | 12 +- tensorflow/core/grappler/utils/functions.h | 40 +-- .../core/grappler/utils/functions_test.cc | 8 +- 10 files changed, 196 insertions(+), 567 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD index 42c3580d40..3f573cda10 100644 --- a/tensorflow/core/grappler/optimizers/BUILD +++ b/tensorflow/core/grappler/optimizers/BUILD @@ -517,13 +517,11 @@ cc_library( ":loop_optimizer", ":memory_optimizer", ":model_pruner", - "//tensorflow/core:core_cpu_base", "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", "//tensorflow/core/grappler:grappler_item", "//tensorflow/core/grappler/utils:colocation", - "//tensorflow/core/grappler/utils:functions", "//tensorflow/core/grappler/utils:topological_sort", ], ) @@ -540,11 +538,9 @@ tf_cuda_cc_test( "//tensorflow/core:tensorflow", "//tensorflow/core:test", "//tensorflow/core:test_main", - "//tensorflow/core:testlib", "//tensorflow/core/grappler:grappler_item", "//tensorflow/core/grappler:utils", "//tensorflow/core/grappler/inputs:trivial_test_graph_input_yielder", - "//tensorflow/core/grappler/utils:grappler_test", ], ) diff --git a/tensorflow/core/grappler/optimizers/function_optimizer.cc b/tensorflow/core/grappler/optimizers/function_optimizer.cc index 950933b933..d008a9719f 100644 --- a/tensorflow/core/grappler/optimizers/function_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/function_optimizer.cc @@ -22,7 +22,6 @@ limitations under the License. #include "tensorflow/core/framework/function.pb.h" #include "tensorflow/core/framework/graph_def_util.h" #include "tensorflow/core/framework/node_def.pb.h" -#include "tensorflow/core/framework/node_def_util.h" #include "tensorflow/core/framework/op_def.pb.h" #include "tensorflow/core/framework/versions.pb.h" #include "tensorflow/core/graph/graph_constructor.h" @@ -76,10 +75,12 @@ string UniqueSpecializedFunctionName(const FunctionDef& func, class FunctionOptimizerContext { public: - explicit FunctionOptimizerContext(RewriterConfig::Toggle opt_level, - const GrapplerItem& item) - : function_library_(OpRegistry::Global(), item.graph.library()) { - InitializeInlinedFunctions(opt_level, item); + explicit FunctionOptimizerContext(const GrapplerItem& item, + RewriterConfig::Toggle opt_level) + : opt_level_(opt_level), + function_library_(FunctionLibraryDefinition(OpRegistry::Global(), + item.graph.library())) { + InitializeInlinedFunctions(item); } const FunctionLibraryDefinition& function_library() const { @@ -100,9 +101,8 @@ class FunctionOptimizerContext { } private: - void InitializeInlinedFunctions(RewriterConfig::Toggle opt_level, - const GrapplerItem& item) { - bool aggressive = opt_level == RewriterConfig::AGGRESSIVE; + void InitializeInlinedFunctions(const GrapplerItem& item) { + bool aggressive = opt_level_ == RewriterConfig::AGGRESSIVE; for (const FunctionDef& func : item.graph.library().function()) { // Can't create IdentityN nodes with no input or output: skip these @@ -120,6 +120,7 @@ class FunctionOptimizerContext { } } + RewriterConfig::Toggle opt_level_; FunctionLibraryDefinition function_library_; // Functions that can be inlined into optimized graph. std::unordered_map inlined_functions_; @@ -127,93 +128,9 @@ class FunctionOptimizerContext { TF_DISALLOW_COPY_AND_ASSIGN(FunctionOptimizerContext); }; -// Return trimmed FunctionDefLibrary with functions that are reachable from -// the optimized graph. -FunctionDefLibrary TrimFunctionLibrary(const FunctionLibraryDefinition& flib, - const GraphDef& optimized_graph) { - // Functions that are reachable from the optimized graph. - std::unordered_set keep_funcs; - - std::vector func_queue; - func_queue.reserve(flib.num_functions()); - - // Add registered and not already processed functions to the queue by name. - const auto add_to_func_queue = [&](const string& func_name) { - const FunctionDef* func = flib.Find(func_name); - if (func && keep_funcs.find(func_name) == keep_funcs.end()) { - func_queue.push_back(func); - } - }; - - // Find all the functions that are reachable from the given node. - const auto add_node_to_func_queue = [&](const NodeDef& node) { - // Node itself can be a call to the function. - add_to_func_queue(node.op()); - - // Or node can have an attribute referencing a function. - for (const auto& attr : node.attr()) { - const auto& attr_value = attr.second; - - // 1. AttrValue.func - if (attr_value.has_func()) { - add_to_func_queue(attr_value.func().name()); - } - - // 2. AttrValue.ListValue.func - if (attr_value.has_list()) { - for (const auto& func : attr_value.list().func()) { - add_to_func_queue(func.name()); - } - } - } - }; - - // Add all functions that are directly called from the optimized graph. - const auto& graph_nodes = optimized_graph.node(); - std::for_each(graph_nodes.begin(), graph_nodes.end(), add_node_to_func_queue); - - // Process all reachable functions. - while (!func_queue.empty()) { - const FunctionDef* func = func_queue.back(); - func_queue.pop_back(); - - const string& func_name = func->signature().name(); - keep_funcs.insert(func_name); - - // Find all the functions that called from the function body. - const auto& func_body = func->node_def(); - std::for_each(func_body.begin(), func_body.end(), add_node_to_func_queue); - - // Check if the function has a registered gradient. - const string grad_func_name = flib.FindGradient(func_name); - if (!grad_func_name.empty()) add_to_func_queue(grad_func_name); - } - - FunctionDefLibrary lib; - for (const string& func_name : keep_funcs) { - const FunctionDef* func = CHECK_NOTNULL(flib.Find(func_name)); - *lib.add_function() = *func; - - const string grad_func_name = flib.FindGradient(func_name); - if (!grad_func_name.empty()) { - GradientDef* gd = lib.add_gradient(); - gd->set_function_name(func_name); - gd->set_gradient_func(grad_func_name); - } - } - - VLOG(3) << "Trimmed function library: " << keep_funcs.size() << " functions (" - << static_cast(keep_funcs.size() - flib.num_functions()) << ")"; - - return lib; -} - Status SpecializeFunction(const NodeDef& func_node, const FunctionDef& func, FunctionOptimizerContext* ctx, GraphDef* optimized_graph) { - VLOG(2) << "Specialize function instantiation: " - << SummarizeNodeDef(func_node); - const std::unordered_map func_attr( func_node.attr().begin(), func_node.attr().end()); @@ -224,20 +141,20 @@ Status SpecializeFunction(const NodeDef& func_node, const FunctionDef& func, TF_RETURN_IF_ERROR(MakeGrapplerFunctionItem(func, func_attr, flib, &item)); // TODO(ezhulenev): Push down const inputs and known input shapes. - FunctionDef specialized_func; - TF_RETURN_IF_ERROR(MakeFunctionDef(item, flib, &specialized_func)); + FunctionDef specialized; + TF_RETURN_IF_ERROR(MakeSpecializedFunctionDef(item, flib, &specialized)); // Find a name for specialized function. const string specialized_func_name = UniqueSpecializedFunctionName(func, func_node, flib); - specialized_func.mutable_signature()->set_name(specialized_func_name); - auto* specialized_attr = specialized_func.mutable_attr(); + specialized.mutable_signature()->set_name(specialized_func_name); + auto* specialized_attr = specialized.mutable_attr(); (*specialized_attr)[kGrapplerSpecializedFuncAttr].set_b(true); // Add specialized function to the library. TF_RETURN_IF_ERROR( - ctx->mutable_function_library().AddFunctionDef(specialized_func)); + ctx->mutable_function_library().AddFunctionDef(specialized)); // Add a function call node for the specialized function. NodeDef* specialized_func_node = optimized_graph->add_node(); @@ -309,8 +226,6 @@ Status HookInlinedFunctionOutputs( Status InlineFunction(const NodeDef& func_node, const FunctionDef& func, const FunctionOptimizerContext& ctx, GraphDef* optimized_graph) { - VLOG(2) << "Inline function instantiation: " << SummarizeNodeDef(func_node); - const std::unordered_map func_attr( func_node.attr().begin(), func_node.attr().end()); @@ -444,8 +359,6 @@ class SymbolicGradientEnv { Status InlineSymbolicGradient(const NodeDef& node, SymbolicGradientEnv* env, GraphDef* inlined_graph) { - VLOG(2) << "Inline symbolic gradient: " << SummarizeNodeDef(node); - GraphDef graph_def; // Create a node to anchor the gradient inputs @@ -541,16 +454,13 @@ Status InlineSymbolicGradient(const NodeDef& node, SymbolicGradientEnv* env, Status FunctionOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, GraphDef* optimized_graph) { - VLOG(2) << "Optimize function library: id=" << item.id; - // Nothing to do here. if (item.graph.library().function_size() == 0) { - VLOG(3) << "Skip Grappler item with empty function library"; *optimized_graph = item.graph; return Status::OK(); } - FunctionOptimizerContext ctx(opt_level_, item); + FunctionOptimizerContext ctx(item, opt_level_); SymbolicGradientEnv env(item.graph.versions().producer(), item.graph.library()); @@ -596,11 +506,9 @@ Status FunctionOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, *optimized_graph->add_node() = node; } + // TODO(bsteiner): trim the library to remove unused function definitions *optimized_graph->mutable_versions() = item.graph.versions(); - *optimized_graph->mutable_library() = - options_.enable_trim_function_library - ? TrimFunctionLibrary(ctx.function_library(), *optimized_graph) - : ctx.function_library().ToProto(); + *optimized_graph->mutable_library() = ctx.function_library().ToProto(); return Status::OK(); } diff --git a/tensorflow/core/grappler/optimizers/function_optimizer.h b/tensorflow/core/grappler/optimizers/function_optimizer.h index e307b4e533..c555fadf83 100644 --- a/tensorflow/core/grappler/optimizers/function_optimizer.h +++ b/tensorflow/core/grappler/optimizers/function_optimizer.h @@ -26,9 +26,8 @@ namespace grappler { // operations to make the overall graph more efficient. class FunctionOptimizer : public GraphOptimizer { public: - explicit FunctionOptimizer(RewriterConfig::Toggle opt_level) - : opt_level_(opt_level) {} - ~FunctionOptimizer() override = default; + FunctionOptimizer(RewriterConfig::Toggle opt_level) : opt_level_(opt_level) {} + ~FunctionOptimizer() override {} string name() const override { return "function_optimizer"; }; @@ -45,7 +44,6 @@ class FunctionOptimizer : public GraphOptimizer { bool enable_function_inlining = true; bool enable_function_specialization = true; bool enable_symbolic_gradient_inlining = true; - bool enable_trim_function_library = true; }; RewriterConfig::Toggle opt_level_; diff --git a/tensorflow/core/grappler/optimizers/function_optimizer_test.cc b/tensorflow/core/grappler/optimizers/function_optimizer_test.cc index 6147e8a27c..fb006d4868 100644 --- a/tensorflow/core/grappler/optimizers/function_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/function_optimizer_test.cc @@ -31,8 +31,20 @@ constexpr char kDevice[] = "/device:CPU:0"; class FunctionOptimizerTest : public GrapplerTest { protected: - void DisableFunctionSpecialization(FunctionOptimizer* optimizer) { + void DisableAll(FunctionOptimizer* optimizer) { + optimizer->options_.enable_function_inlining = false; optimizer->options_.enable_function_specialization = false; + optimizer->options_.enable_symbolic_gradient_inlining = false; + } + + void EnableOnlyFunctionInlining(FunctionOptimizer* optimizer) { + DisableAll(optimizer); + optimizer->options_.enable_function_inlining = true; + } + + void EnableOnlyFunctionSpecialization(FunctionOptimizer* optimizer) { + DisableAll(optimizer); + optimizer->options_.enable_function_specialization = true; } }; @@ -340,7 +352,7 @@ TEST_F(FunctionOptimizerTest, InlineFunction_FunctionWithoutInput) { using test::function::NDef; FunctionOptimizer optimizer(RewriterConfig::DEFAULT); - DisableFunctionSpecialization(&optimizer); // do not specialize noinline func + EnableOnlyFunctionInlining(&optimizer); const Tensor kTwo = test::AsScalar(2); FunctionDef func = FunctionDefHelper::Define( @@ -614,13 +626,14 @@ TEST_F(FunctionOptimizerTest, SpecializeFunction_XTimesTwo) { using test::function::NDef; FunctionOptimizer optimizer(RewriterConfig::DEFAULT); + EnableOnlyFunctionSpecialization(&optimizer); - // Mark XTimesTwo as noinline. + // Mark XTimesTwo as noinline FunctionDef x_times_two = test::function::XTimesTwo(); (*x_times_two.mutable_attr())["_noinline"].set_b(true); std::vector function_library = {x_times_two}; - // Build a graph to compute y = XTimesTwo(x). + // Build a graph to compute y = XTimesTwo(x) GrapplerItem item; item.graph = test::function::GDef( {NDef("x", "Placeholder", {}, {{"dtype", DT_FLOAT}}, kDevice), @@ -631,13 +644,12 @@ TEST_F(FunctionOptimizerTest, SpecializeFunction_XTimesTwo) { GraphDef output; TF_EXPECT_OK(optimizer.Optimize(nullptr, item, &output)); - // Make sure that specialized function was added to the library and original - // function was removed. - EXPECT_EQ(1, output.library().function_size()); + // Make sure that specialized function was added to the library + EXPECT_EQ(2, output.library().function_size()); EXPECT_EQ("XTimesTwo_specialized_for_y", - output.library().function(0).signature().name()); + output.library().function(1).signature().name()); - // And 'y' node is calling specialized function. + // And 'y' node is calling specialized function int count = 0; for (const NodeDef& node : output.node()) { if (node.name() == "y" && count++) { @@ -646,7 +658,7 @@ TEST_F(FunctionOptimizerTest, SpecializeFunction_XTimesTwo) { } EXPECT_EQ(1, count); - // And that graph evaluation yields the same result. + // And that graph evaluation yields the same result Tensor pi = test::AsScalar(3.14f); item.fetch = {"z"}; item.feed.emplace_back("x", pi); diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc index cdc4698c34..558b8a77e8 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc @@ -14,7 +14,6 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/core/grappler/optimizers/meta_optimizer.h" -#include "tensorflow/core/common_runtime/function.h" #include "tensorflow/core/framework/function.pb.h" #include "tensorflow/core/framework/versions.pb.h" #include "tensorflow/core/grappler/optimizers/arithmetic_optimizer.h" @@ -30,7 +29,6 @@ limitations under the License. #include "tensorflow/core/grappler/optimizers/memory_optimizer.h" #include "tensorflow/core/grappler/optimizers/model_pruner.h" #include "tensorflow/core/grappler/utils/colocation.h" -#include "tensorflow/core/grappler/utils/functions.h" #include "tensorflow/core/grappler/utils/topological_sort.h" #include "tensorflow/core/lib/core/status.h" @@ -38,9 +36,6 @@ namespace tensorflow { namespace grappler { namespace { - -constexpr int kDefaultNumberOfIterations = 1; - int64 NumEdges(const GraphDef& graph) { int64 num_edges = 0; for (const auto& node : graph.node()) { @@ -55,138 +50,144 @@ string PrintSizesBeforeAfter(const GraphDef& before, const GraphDef& after) { NumEdges(after), " edges (", NumEdges(after) - NumEdges(before), ")"); } - -int NumIterations(const RewriterConfig& cfg) { - return cfg.meta_optimizer_iterations() == RewriterConfig::DEFAULT_NUM_ITERS - ? kDefaultNumberOfIterations - : cfg.meta_optimizer_iterations(); -} - -// Check if optimizer is allowed to run only once. -int IsRunOnceOptimizer(const string& name) { return name == "layout"; } - } // namespace -std::unique_ptr MetaOptimizer::MakeNewOptimizer( - const string& optimizer) const { -#define MK_OPT(NAME, VALUE) \ - if (optimizer == NAME) return std::unique_ptr(VALUE) - - MK_OPT("pruning", new ModelPruner()); - MK_OPT("function", new FunctionOptimizer(cfg_.function_optimization())); - MK_OPT("constfold", new ConstantFolding(cpu_device_)); - MK_OPT("layout", new LayoutOptimizer()); - MK_OPT("memory", new MemoryOptimizer(RewriterConfig::MANUAL)); - MK_OPT("arithmetic", new ArithmeticOptimizer(cfg_.arithmetic_optimization())); - MK_OPT("autoparallel", new AutoParallel(cfg_.auto_parallel().num_replicas())); - MK_OPT("loop", new LoopOptimizer(cfg_.loop_optimization())); - MK_OPT("dependency", new DependencyOptimizer(cfg_.dependency_optimization())); - MK_OPT("debug_stripper", new DebugStripper()); - - return std::unique_ptr(); -#undef MK_OPT -} - -Status MetaOptimizer::InitializeOptimizers( - std::vector>* optimizers) const { - if (!cfg_.disable_model_pruning()) { - optimizers->emplace_back(new ModelPruner()); +std::unique_ptr MetaOptimizer::NewOptimizer( + const string& optimizer) { + std::unique_ptr graph_optimizer; + if (optimizer == "pruning") { + graph_optimizer.reset(new ModelPruner()); } - if (cfg_.function_optimization() != RewriterConfig::OFF) { - optimizers->emplace_back( - new FunctionOptimizer(cfg_.function_optimization())); + if (optimizer == "function") { + graph_optimizer.reset(new FunctionOptimizer(cfg_.function_optimization())); } - if (cfg_.debug_stripper() == RewriterConfig::ON) { - optimizers->emplace_back(new DebugStripper()); + if (optimizer == "constfold") { + graph_optimizer.reset(new ConstantFolding(cpu_device_)); } - if (cfg_.constant_folding() != RewriterConfig::OFF) { - optimizers->emplace_back( - new ConstantFolding(cfg_.constant_folding(), cpu_device_)); + if (optimizer == "layout") { + graph_optimizer.reset(new LayoutOptimizer()); } - if (cfg_.arithmetic_optimization() != RewriterConfig::OFF) { - optimizers->emplace_back( - new ArithmeticOptimizer(cfg_.arithmetic_optimization())); + if (optimizer == "memory") { + graph_optimizer.reset(new MemoryOptimizer(RewriterConfig::MANUAL)); } - if (cfg_.loop_optimization() != RewriterConfig::OFF) { - optimizers->emplace_back(new LoopOptimizer(cfg_.loop_optimization())); + if (optimizer == "arithmetic") { + graph_optimizer.reset( + new ArithmeticOptimizer(cfg_.arithmetic_optimization())); } - if (cfg_.dependency_optimization() != RewriterConfig::OFF) { - optimizers->emplace_back( - new DependencyOptimizer(cfg_.dependency_optimization())); + if (optimizer == "autoparallel") { + graph_optimizer.reset( + new AutoParallel(cfg_.auto_parallel().num_replicas())); } - if (cfg_.layout_optimizer() != RewriterConfig::OFF) { - optimizers->emplace_back(new LayoutOptimizer()); + if (optimizer == "loop") { + graph_optimizer.reset(new LoopOptimizer(cfg_.loop_optimization())); } - if (cfg_.memory_optimization() != RewriterConfig::NO_MEM_OPT) { - if (cfg_.memory_optimizer_target_node_name_scope().empty()) { - optimizers->emplace_back( - // Use the default target node name prefix "gradients/" - new MemoryOptimizer(cfg_.memory_optimization())); - } else { - optimizers->emplace_back( - new MemoryOptimizer(cfg_.memory_optimization(), - cfg_.memory_optimizer_target_node_name_scope())); - } + if (optimizer == "dependency") { + graph_optimizer.reset( + new DependencyOptimizer(cfg_.dependency_optimization())); } - if (cfg_.auto_parallel().enable()) { - optimizers->emplace_back( - new AutoParallel(cfg_.auto_parallel().num_replicas())); + if (optimizer == "debug_stripper") { + graph_optimizer.reset(new DebugStripper()); } - return Status::OK(); + return graph_optimizer; } -Status MetaOptimizer::InitializeOptimizersByName( - std::vector>* optimizers) const { - for (const string& optimizer_name : cfg_.optimizers()) { - auto optimizer = MakeNewOptimizer(optimizer_name); - if (optimizer) { - VLOG(2) << "Registered default graph optimizer: " << optimizer_name; - optimizers->push_back(std::move(optimizer)); - continue; +Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, + GraphDef* optimized_graph) { + std::vector> optimizers; + if (cfg_.optimizers().empty()) { + if (!cfg_.disable_model_pruning()) { + optimizers.push_back(std::unique_ptr(new ModelPruner())); } - - auto custom_optimizer = - CustomGraphOptimizerRegistry::CreateByNameOrNull(optimizer_name); - - if (custom_optimizer) { - VLOG(2) << "Registered custom graph optimizer: " << optimizer_name; - TF_RETURN_IF_ERROR(custom_optimizer->Init()); - optimizers->push_back(std::move(custom_optimizer)); - } else { - VLOG(2) << "Can't register an optimizer by name: " << optimizer_name; + if (cfg_.function_optimization() != RewriterConfig::OFF) { + optimizers.push_back(std::unique_ptr( + new FunctionOptimizer(cfg_.function_optimization()))); + } + if (cfg_.debug_stripper() == RewriterConfig::ON) { + optimizers.push_back( + std::unique_ptr(new DebugStripper())); + } + if (cfg_.constant_folding() != RewriterConfig::OFF) { + optimizers.push_back(std::unique_ptr( + new ConstantFolding(cfg_.constant_folding(), cpu_device_))); + } + if (cfg_.arithmetic_optimization() != RewriterConfig::OFF) { + optimizers.push_back(std::unique_ptr( + new ArithmeticOptimizer(cfg_.arithmetic_optimization()))); + } + if (cfg_.loop_optimization() != RewriterConfig::OFF) { + optimizers.push_back(std::unique_ptr( + new LoopOptimizer(cfg_.loop_optimization()))); + } + if (cfg_.dependency_optimization() != RewriterConfig::OFF) { + optimizers.push_back(std::unique_ptr( + new DependencyOptimizer(cfg_.dependency_optimization()))); + } + if (cfg_.layout_optimizer() != RewriterConfig::OFF) { + optimizers.push_back( + std::unique_ptr(new LayoutOptimizer())); + } + if (cfg_.memory_optimization() != RewriterConfig::NO_MEM_OPT) { + if (cfg_.memory_optimizer_target_node_name_scope().empty()) { + optimizers.push_back(std::unique_ptr( + // Use the default target node name prefix "gradients/" + new MemoryOptimizer(cfg_.memory_optimization()))); + } else { + optimizers.push_back( + std::unique_ptr(new MemoryOptimizer( + cfg_.memory_optimization(), + cfg_.memory_optimizer_target_node_name_scope()))); + } + } + if (cfg_.auto_parallel().enable()) { + optimizers.push_back(std::unique_ptr( + new AutoParallel(cfg_.auto_parallel().num_replicas()))); + } + } else { + const std::set available_optimizers = { + "pruning", "function", "constfold", "layout", + "memory", "autoparallel", "arithmetic", "loop", + "dependency", "debug_stripper"}; + std::vector custom_optimizer_names; + for (const auto& optimizer_name : cfg_.optimizers()) { + if (available_optimizers.find(optimizer_name) != + available_optimizers.end()) { + optimizers.push_back(NewOptimizer(optimizer_name)); + } else { + custom_optimizer_names.push_back(optimizer_name); + } + } + // Now run the custom optimizers. + for (const auto& optimizer_name : custom_optimizer_names) { + std::unique_ptr opt = + CustomGraphOptimizerRegistry::CreateByNameOrNull(optimizer_name); + if (opt == nullptr) continue; + TF_RETURN_IF_ERROR(opt->Init()); + optimizers.push_back(std::move(opt)); } } - return Status::OK(); -} - -Status MetaOptimizer::OptimizeGraph(Cluster* cluster, const GrapplerItem& item, - GraphDef* optimized_graph) { - VLOG(2) << "Optimize GrapplerItem: item.id=" << item.id; - - std::vector> optimizers; - bool register_by_name = !cfg_.optimizers().empty(); - TF_RETURN_IF_ERROR(register_by_name ? InitializeOptimizersByName(&optimizers) - : InitializeOptimizers(&optimizers)); if (optimizers.empty()) { *optimized_graph = item.graph; return Status::OK(); } - // Invariant: optimized_graph contains the most recently optimized version of - // the graph. + // Some optimizers should be run only once. + const std::set run_once_optimizers = {"layout"}; + bool already_optimized = false; + const int num_iterations = + cfg_.meta_optimizer_iterations() == RewriterConfig::DEFAULT_NUM_ITERS + ? 1 + : cfg_.meta_optimizer_iterations(); GrapplerItem optimized_item = item; optimized_graph->Swap(&optimized_item.graph); - - GraphOptimizationResult optimization_result(item.id); - - for (int iteration = 0; iteration < NumIterations(cfg_); ++iteration) { - VLOG(4) << "Starting optimization iteration " << iteration + 1; - + for (int iteration = 0; iteration < num_iterations; ++iteration) { + VLOG(1) << "Starting optimization iteration " << iteration + 1; for (const auto& optimizer : optimizers) { - // Some optimizers can run only once. - if (iteration > 0 && IsRunOnceOptimizer(optimizer->name())) continue; - + // Invariant: optimized_graph contains the most recently optimized + // version of the graph. + if (iteration > 0 && run_once_optimizers.count(optimizer->name())) { + continue; + } uint64 start_us = Env::Default()->NowMicros(); // This swaps the current optimized_graph into optimized item and // resets optimized_graph to an empty graph. @@ -194,118 +195,45 @@ Status MetaOptimizer::OptimizeGraph(Cluster* cluster, const GrapplerItem& item, *optimized_graph = GraphDef(); Status status = optimizer->Optimize(cluster, optimized_item, optimized_graph); - uint64 end_us = Env::Default()->NowMicros(); + uint64 end_us = Env::Default()->NowMicros(); + float duration_ms = (end_us - start_us) / 1000.0f; string result; if (!status.ok()) { + VLOG(1) << "Not able to apply optimizer " << optimizer->name() << ": " + << status.ToString(); optimized_graph->Swap(&optimized_item.graph); result = status.ToString(); } else { - optimization_result.is_optimized = true; - float duration_ms = (end_us - start_us) / 1000.0f; + already_optimized = true; result = strings::StrCat( + optimizer->name(), ": ", PrintSizesBeforeAfter(optimized_item.graph, *optimized_graph), ", time = ", duration_ms, "ms."); } - VLOG(4) << optimizer->name() << ": " << result; - - OptimizerResult optimizer_result{optimizer->name(), result}; - optimization_result.results.push_back(optimizer_result); + result_.emplace_back(optimizer->name(), result); + VLOG(1) << result; } } - // Record graph optimization result. - optimization_results_.push_back(optimization_result); - - if (optimization_result.is_optimized) { + if (already_optimized) { TF_RETURN_IF_ERROR(TopologicalSort(optimized_graph)); ReassignColocation(optimized_graph); - // Make sure that the optimizers preserved the graph version. + // Make sure that the optimizers preserved the graph version and library. + DCHECK_GE(optimized_graph->library().function_size(), + item.graph.library().function_size()); + DCHECK_GE(optimized_graph->library().gradient_size(), + item.graph.library().gradient_size()); DCHECK_EQ(optimized_graph->versions().producer(), item.graph.versions().producer()); } - - return Status::OK(); -} - -Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, - GraphDef* optimized_graph) { - optimization_results_.clear(); - - // 1. Optimize main graph - TF_RETURN_IF_ERROR(OptimizeGraph(cluster, item, optimized_graph)); - - // 2. Optimize function library - FunctionLibraryDefinition flib(OpRegistry::Global(), - optimized_graph->library()); - - // Optimize each function only once. - std::unordered_set optimized_funcs; - bool optimize_function_library = true; - - // TODO(ezhulenev): turn it on after fixing ranklab: tune_tf_test. - cfg_.set_constant_folding(RewriterConfig::OFF); - cfg_.set_arithmetic_optimization(RewriterConfig::OFF); - - while (optimize_function_library) { - optimize_function_library = false; - - for (const FunctionDef& func : optimized_graph->library().function()) { - const string& func_name = func.signature().name(); - - // Skip already optimized functions. - if (optimized_funcs.find(func_name) != optimized_funcs.end()) continue; - - // Skip parametrized functions (function type or body is defined only at - // function call time by caller node attributes). - if (IsParametrized(func)) continue; - - VLOG(3) << "Optimize function: function=" << func_name; - - // Function optimization might specialize nested function calls, so we - // have to reset the flag and do at least one more pass over the library. - optimize_function_library = true; - optimized_funcs.insert(func_name); - - // Make a GrapplerItem from a FunctionDef. - GrapplerFunctionItem func_item; - TF_RETURN_IF_ERROR(MakeGrapplerFunctionItem(func, flib, &func_item)); - - // Optimize function body graph. - GraphDef optimized_func_graph; - TF_RETURN_IF_ERROR( - OptimizeGraph(cluster, func_item, &optimized_func_graph)); - - // Function body optimization might have created new specialized - // functions, add them to the library. - TF_RETURN_IF_ERROR(flib.AddLibrary(optimized_func_graph.library())); - - // Convert optimized graph back to FunctionDef. - FunctionDef optimized_func; - func_item.SwapFunctionBody(std::move(optimized_func_graph)); - TF_RETURN_IF_ERROR(MakeFunctionDef(func_item, flib, &optimized_func)); - - // Replace optimized function with a new FunctionDef. - TF_RETURN_IF_ERROR(flib.RemoveFunction(func_name)); - TF_RETURN_IF_ERROR(flib.AddFunctionDef(optimized_func)); - } - - // If optimized at least one function, update the graph library. - if (optimize_function_library) { - *optimized_graph->mutable_library() = flib.ToProto(); - } - } - return Status::OK(); } void MetaOptimizer::PrintResult() { - for (const GraphOptimizationResult& graph_result : optimization_results_) { - LOG(INFO) << "Optimization results for grappler item: " << graph_result.id; - for (const OptimizerResult& result : graph_result.results) { - LOG(INFO) << "Return status of optimizer " << result.optimizer_name - << ": " << result.result; - } + for (const auto& result : result_) { + LOG(INFO) << "Return status of optimizer " << result.first << ": " + << result.second; } } diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.h b/tensorflow/core/grappler/optimizers/meta_optimizer.h index 7cf9a40c2d..382cfe51d4 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.h +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.h @@ -30,7 +30,7 @@ class MetaOptimizer : public GraphOptimizer { public: MetaOptimizer(DeviceBase* cpu_device, const RewriterConfig& cfg) : cpu_device_(cpu_device), cfg_(cfg) {} - ~MetaOptimizer() override = default; + ~MetaOptimizer() override {} string name() const override { return "meta_optimizer"; }; @@ -43,37 +43,10 @@ class MetaOptimizer : public GraphOptimizer { const GraphDef& optimized_graph, double result) override; private: - std::unique_ptr MakeNewOptimizer( - const string& optimizer) const; - - // Initialize active optimizers from RewriterConfig toggles. - Status InitializeOptimizers( - std::vector>* optimizers) const; - // Initialize active optimizers from RewriterConfig optimizer names. - Status InitializeOptimizersByName( - std::vector>* optimizers) const; - - // Run optimization pass over a single GrapplerItem. Meta optimizer might run - // multiple such passes: 1) for the main graph 2) for the function library - Status OptimizeGraph(Cluster* cluster, const GrapplerItem& item, - GraphDef* optimized_graph); - + std::unique_ptr NewOptimizer(const string& optimizer); DeviceBase* const cpu_device_; // may be NULL RewriterConfig cfg_; - - struct OptimizerResult { - string optimizer_name; - string result; - }; - - struct GraphOptimizationResult { - explicit GraphOptimizationResult(const string& id) : id(id) {} - string id; - bool is_optimized = false; - std::vector results; - }; - - std::vector optimization_results_; + std::vector> result_; }; bool MetaOptimizerEnabled(const RewriterConfig& cfg); diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer_test.cc b/tensorflow/core/grappler/optimizers/meta_optimizer_test.cc index 8793ad9633..d9a386b9be 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/meta_optimizer_test.cc @@ -16,14 +16,11 @@ limitations under the License. #include "tensorflow/core/grappler/optimizers/meta_optimizer.h" #include "tensorflow/cc/ops/standard_ops.h" -#include "tensorflow/core/framework/function_testlib.h" -#include "tensorflow/core/framework/tensor_testutil.h" #include "tensorflow/core/grappler/grappler_item.h" #include "tensorflow/core/grappler/inputs/trivial_test_graph_input_yielder.h" #include "tensorflow/core/grappler/optimizers/custom_graph_optimizer.h" #include "tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.h" #include "tensorflow/core/grappler/utils.h" -#include "tensorflow/core/grappler/utils/grappler_test.h" #include "tensorflow/core/lib/core/status_test_util.h" #include "tensorflow/core/platform/test.h" @@ -31,8 +28,6 @@ namespace tensorflow { namespace grappler { namespace { -constexpr char kDevice[] = "/device:CPU:0"; - class TestOptimizer : public CustomGraphOptimizer { public: static void SetOptimized(const bool flag_value) { optimized_ = flag_value; } @@ -61,9 +56,7 @@ bool TestOptimizer::optimized_; REGISTER_GRAPH_OPTIMIZER(TestOptimizer); -class MetaOptimizerTest : public GrapplerTest {}; - -TEST_F(MetaOptimizerTest, RunsCustomOptimizer) { +TEST(MetaOptimizerTest, RunsCustomOptimizer) { TrivialTestGraphInputYielder fake_input(4, 1, 10, false, {"CPU:0"}); GrapplerItem item; CHECK(fake_input.NextItem(&item)); @@ -79,7 +72,7 @@ TEST_F(MetaOptimizerTest, RunsCustomOptimizer) { EXPECT_TRUE(TestOptimizer::IsOptimized()); } -TEST_F(MetaOptimizerTest, RunOptimizersTwice) { +TEST(MetaOptimizerTest, RunOptimizersTwice) { TrivialTestGraphInputYielder fake_input(4, 1, 10, false, {"CPU:0"}); GrapplerItem item; CHECK(fake_input.NextItem(&item)); @@ -93,167 +86,6 @@ TEST_F(MetaOptimizerTest, RunOptimizersTwice) { TF_EXPECT_OK(status); } -TEST_F(MetaOptimizerTest, OptimizeFunctionLibrary) { - using test::function::NDef; - - // Enable ony function optimization. - RewriterConfig rewriter_config; - rewriter_config.set_meta_optimizer_iterations(RewriterConfig::TWO); - rewriter_config.set_function_optimization(RewriterConfig::ON); - rewriter_config.add_optimizers("function"); - - MetaOptimizer optimizer(nullptr, rewriter_config); - - // Define function library: - // - // MyMul(x, y) = x * y - // *MySquare(x) = MyMul(x, x) - // *MyQuadratic(x) = MySquare(MySquare(x)) - // - // * - marked as noinline - - FunctionDef mul_func = FunctionDefHelper::Create( - "MyMul", {"x:T", "y:T"}, {"z:T"}, {"T: {float, double}"}, - {{{"mul"}, "Mul", {"x", "y"}, {{"T", "$T"}}}}, - /* Mapping between function returns and function node outputs. */ - {{"z", "mul:z:0"}}); - - FunctionDef square_func = FunctionDefHelper::Create( - "MySquare", {"x:T"}, {"z:T"}, {"T: {float, double}"}, - {{{"my_mul"}, "MyMul", {"x", "x"}, {{"T", "$T"}}}}, - /* Mapping between function returns and function node outputs. */ - {{"z", "my_mul:z:0"}}); - (*square_func.mutable_attr())["_noinline"].set_b(true); - - FunctionDef quadratic_func = FunctionDefHelper::Create( - "MyQuadratic", {"x:T"}, {"z:T"}, {"T: {float, double}"}, - {{{"square"}, "MySquare", {"x"}, {{"T", "$T"}}}, - {{"quadratic"}, "MySquare", {"square:z"}, {{"T", "$T"}}}}, - /* Mapping between function returns and function node outputs. */ - {{"z", "quadratic:z:0"}}); - (*quadratic_func.mutable_attr())["_noinline"].set_b(true); - - // Tensorflow graph: - // - // a = tf.Placeholder(tf.float); - // b = tf.Placeholder(tf.int32); - // - // square = MySquare(a); // a^2 - // quadratic = MyQuadratic(b); // b^4 - GrapplerItem item; - item.graph = test::function::GDef( - {NDef("a", "Placeholder", {}, {{"dtype", DT_FLOAT}}, kDevice), - NDef("b", "Placeholder", {}, {{"dtype", DT_INT32}}, kDevice), - // Calls into function library - NDef("square", "MySquare", {"a"}, {{"T", DT_FLOAT}}, kDevice), - NDef("quadratic", "MyQuadratic", {"b"}, {{"T", DT_INT32}}, kDevice), - // Forward outputs - NDef("out_s", "Identity", {"square:0"}, {{"T", DT_FLOAT}}, kDevice), - NDef("out_q", "Identity", {"quadratic:0"}, {{"T", DT_INT32}}, kDevice)}, - // FunctionLib - {mul_func, square_func, quadratic_func}); - - GraphDef output; - TF_EXPECT_OK(optimizer.Optimize(nullptr, item, &output)); - - FunctionLibraryDefinition optimized_flib(OpRegistry::Global(), - output.library()); - - // Specialized and optimized functions should be added to the graph. - EXPECT_EQ(6, optimized_flib.num_functions()); - - // MyQuadratic should be specialized once: - // 0. 'quadratic' node in the main graph - const string optimized_0 = "MyQuadratic_specialized_for_quadratic"; - - // MySquare should be specialized and optimized for 3 instantiations: - // 1. 'square' node in the main graph - // 2. 'square' node in the MyQuadratic specialization - // 3. 'quadratic' node in the MyQuadratic specialization - - const string optimized_1 = "MySquare_specialized_for_square"; - const string optimized_2 = "MySquare_specialized_for_square_1"; - const string optimized_3 = "MySquare_specialized_for_quadratic"; - - const FunctionDef* optimized_func_0 = optimized_flib.Find(optimized_0); - const FunctionDef* optimized_func_1 = optimized_flib.Find(optimized_1); - const FunctionDef* optimized_func_2 = optimized_flib.Find(optimized_2); - const FunctionDef* optimized_func_3 = optimized_flib.Find(optimized_3); - - ASSERT_NE(optimized_func_0, nullptr); - ASSERT_NE(optimized_func_1, nullptr); - ASSERT_NE(optimized_func_2, nullptr); - ASSERT_NE(optimized_func_3, nullptr); - - // Graph should call optimized function. - int count = 0; - for (const NodeDef& node : output.node()) { - if (node.name() == "square" && count++) { - EXPECT_EQ("MySquare_specialized_for_square", node.op()); - } else if (node.name() == "quadratic" && count++) { - EXPECT_EQ("MyQuadratic_specialized_for_quadratic", node.op()); - } - } - EXPECT_EQ(2, count); - - // Specialized MySquare should call specialized functions. - count = 0; - for (const NodeDef& node : optimized_func_0->node_def()) { - if (node.name() == "square" && count++) { - EXPECT_EQ(optimized_2, node.op()); - } else if (node.name() == "quadratic" && count++) { - EXPECT_EQ(optimized_3, node.op()); - } - } - EXPECT_EQ(2, count); - - const std::vector optimized_funcs = { - optimized_func_1, optimized_func_1, optimized_func_3}; - - // MyMul should be inlined into all optimized versions of MySquare. - for (const FunctionDef* optimized_func : optimized_funcs) { - count = 0; - for (const NodeDef& node : optimized_func->node_def()) { - if (node.name() == "my_mul/inlined_inputs" && count++) { - EXPECT_EQ("IdentityN", node.op()); - EXPECT_EQ(2, node.input_size()); - EXPECT_EQ("x:0", node.input(0)); - EXPECT_EQ("x:0", node.input(1)); - } else if (node.name() == "my_mul/x" && count++) { - EXPECT_EQ("Identity", node.op()); - EXPECT_EQ(1, node.input_size()); - EXPECT_EQ("my_mul/inlined_inputs:output:0", node.input(0)); - } else if (node.name() == "my_mul/y" && count++) { - EXPECT_EQ("Identity", node.op()); - EXPECT_EQ(1, node.input_size()); - EXPECT_EQ("my_mul/inlined_inputs:output:1", node.input(0)); - } else if (node.name() == "my_mul/mul" && count++) { - EXPECT_EQ("Mul", node.op()); - EXPECT_EQ(2, node.input_size()); - EXPECT_EQ("my_mul/x:output:0", node.input(0)); - EXPECT_EQ("my_mul/y:output:0", node.input(1)); - } else if (node.name() == "my_mul" && count++) { - EXPECT_EQ("IdentityN", node.op()); - EXPECT_EQ(1, node.input_size()); - EXPECT_EQ("my_mul/mul:z:0", node.input(0)); - } - EXPECT_TRUE(node.device().empty()); - } - EXPECT_EQ(5, count); - } - - item.fetch = {"out_s", "out_q"}; - item.feed.emplace_back("a", test::AsScalar(2.0f)); - item.feed.emplace_back("b", test::AsScalar(4)); - auto tensors_expected = EvaluateFetchNodes(item); - - GrapplerItem optimized(item, std::move(output)); - auto tensors = EvaluateFetchNodes(optimized); - - test::ExpectTensorEqual(tensors_expected[0], tensors[0]); - test::ExpectTensorEqual(tensors_expected[1], tensors[1]); -} - } // namespace } // namespace grappler } // namespace tensorflow diff --git a/tensorflow/core/grappler/utils/functions.cc b/tensorflow/core/grappler/utils/functions.cc index 790809bc67..638fe1999a 100644 --- a/tensorflow/core/grappler/utils/functions.cc +++ b/tensorflow/core/grappler/utils/functions.cc @@ -545,12 +545,6 @@ Status MakeGrapplerFunctionItem(const FunctionDef& func, return Status::OK(); } -Status MakeGrapplerFunctionItem(const FunctionDef& func, - const FunctionLibraryDefinition& flib, - GrapplerFunctionItem* item) { - return MakeGrapplerFunctionItem(func, AttrValueMap(), flib, item); -} - // Register GrapplerFunctionItem input arg expansion and function body outputs // in the GrapplerFunctionConnectivity. Status RegisterGrapplerFunctionConnectivity( @@ -566,9 +560,9 @@ Status RegisterGrapplerFunctionConnectivity( return Status::OK(); } -Status MakeFunctionDef(const GrapplerFunctionItem& item, - const FunctionLibraryDefinition& flib, - FunctionDef* func) { +Status MakeSpecializedFunctionDef(const GrapplerFunctionItem& item, + const FunctionLibraryDefinition& flib, + FunctionDef* func) { func->mutable_signature()->set_name(item.id); func->mutable_signature()->set_is_stateful(item.is_stateful()); diff --git a/tensorflow/core/grappler/utils/functions.h b/tensorflow/core/grappler/utils/functions.h index 5e8b6c6960..ab369bcad7 100644 --- a/tensorflow/core/grappler/utils/functions.h +++ b/tensorflow/core/grappler/utils/functions.h @@ -38,8 +38,7 @@ using AttrValueMap = std::unordered_map; // function body in place of function inputs and a resolved input data type. struct InputArgExpansion { // TODO(ezhulenev): Add support for functions with tensor sequence inputs of - // different data types. - // TODO(ezhulenev): Support type parametrized inputs? + // different data types string input_name; // name of the function input argument DataType data_type; // input data type bool is_ref; // if true, inputs are required to be refs @@ -54,8 +53,7 @@ struct InputArgExpansion { // tensors of a function body nodes and a resolved output data type struct OutputArgExpansion { // TODO(ezhulenev): Add support for functions with tensor sequence outputs of - // different data types. - // TODO(ezhulenev): Support type parametrized outputs? + // different data types string output_name; // name of the function output argument DataType data_type; // output data type bool is_ref; // if true, outputs are refs @@ -188,6 +186,13 @@ bool HasParametrizedBody(const FunctionDef& func); // Check if function has parametrized type or body. bool IsParametrized(const FunctionDef& func); +// Make a GrapplerFunctionItem from the function definition and attributes. +// Return error if the given function def cannot be converted. +Status MakeGrapplerFunctionItem( + const FunctionDef& func, + const std::unordered_map& func_instantiation_attr, + const FunctionLibraryDefinition& flib, GrapplerFunctionItem* item); + // Register GrapplerFunctionItem input arg expansion and function body outputs // in the GrapplerFunctionConnectivity. Use function library definition to // lookup function body nodes output names and ranges. @@ -195,28 +200,11 @@ Status RegisterGrapplerFunctionConnectivity( const GrapplerFunctionItem& item, const FunctionLibraryDefinition& flib, GrapplerFunctionConnectivity* connectivity); -// Make a GrapplerFunctionItem from the function definition and function -// instantiation attributes (caller node attributes). Returns error if the given -// function def cannot be converted (e.g. not all attributes are defined). -Status MakeGrapplerFunctionItem( - const FunctionDef& func, - const std::unordered_map& func_instantiation_attr, - const FunctionLibraryDefinition& flib, GrapplerFunctionItem* item); - -// Make a GrapplerFunction item from the function definition. Function must be -// fully defined (no type or body parametrization). -// TODO(ezhulenev): Support parametrized functions without fully defined -// instantiation attributes? Do we ever want to optimize parametrized function -// without specializing it to it's instantiation attributes (at least types)? -Status MakeGrapplerFunctionItem(const FunctionDef& func, - const FunctionLibraryDefinition& flib, - GrapplerFunctionItem* item); - -// Make a FunctionDef from the GrapplerFunctionItem. Use function library -// definition to lookup function body nodes output names and ranges. -Status MakeFunctionDef(const GrapplerFunctionItem& item, - const FunctionLibraryDefinition& flib, - FunctionDef* func); +// Make a specialized FunctionDef from the GrapplerFunctionItem. Use function +// library definition to lookup function body nodes output names and ranges. +Status MakeSpecializedFunctionDef(const GrapplerFunctionItem& item, + const FunctionLibraryDefinition& flib, + FunctionDef* func); } // end namespace grappler } // end namespace tensorflow diff --git a/tensorflow/core/grappler/utils/functions_test.cc b/tensorflow/core/grappler/utils/functions_test.cc index 6dfd49b943..54d235a8a4 100644 --- a/tensorflow/core/grappler/utils/functions_test.cc +++ b/tensorflow/core/grappler/utils/functions_test.cc @@ -524,7 +524,7 @@ TEST_F(FunctionsTest, FromFunctionDefWithoutInput) { EXPECT_EQ("two", cast.input(0)); } -TEST_F(FunctionsTest, MakeFunctionDef) { +TEST_F(FunctionsTest, MakeSpecializedFunctionDef) { const Tensor kTwo = test::AsScalar(2); FunctionDef func = FunctionDefHelper::Define( // Name @@ -550,7 +550,7 @@ TEST_F(FunctionsTest, MakeFunctionDef) { TF_EXPECT_OK(MakeGrapplerFunctionItem(func, func_attr, flib, &item)); FunctionDef specialized; - TF_EXPECT_OK(MakeFunctionDef(item, flib, &specialized)); + TF_EXPECT_OK(MakeSpecializedFunctionDef(item, flib, &specialized)); // Input and output types are resolved based on instantiation attributes. EXPECT_EQ("x", specialized.signature().input_arg(0).name()); @@ -573,7 +573,7 @@ TEST_F(FunctionsTest, MakeFunctionDef) { EXPECT_EQ(2, count); } -TEST_F(FunctionsTest, SwapFunctionBodyAndMakeFunctionDef) { +TEST_F(FunctionsTest, SwapFunctionBodyAndMakeSpecializedFunctionDef) { using test::function::NDef; FunctionDef mul_func = FunctionDefHelper::Create( @@ -606,7 +606,7 @@ TEST_F(FunctionsTest, SwapFunctionBodyAndMakeFunctionDef) { // Replace function body with identity function item.SwapFunctionBody(std::move(id_func_body)); FunctionDef specialized; - TF_EXPECT_OK(MakeFunctionDef(item, flib, &specialized)); + TF_EXPECT_OK(MakeSpecializedFunctionDef(item, flib, &specialized)); // Check that graph body was updated. int count = 0; -- GitLab From 82679654af098df1de27bcdcf6fc6942ccf4f236 Mon Sep 17 00:00:00 2001 From: ADiegoCAlonso Date: Sat, 21 Apr 2018 11:43:51 +0200 Subject: [PATCH 111/434] Add __init__py --- tensorflow/examples/tutorials/estimators/__init__.py | 0 tensorflow/examples/tutorials/input_fn/__init__.py | 0 tensorflow/examples/tutorials/layers/__init__.py | 0 tensorflow/examples/tutorials/monitors/__init__.py | 0 4 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 tensorflow/examples/tutorials/estimators/__init__.py create mode 100644 tensorflow/examples/tutorials/input_fn/__init__.py create mode 100644 tensorflow/examples/tutorials/layers/__init__.py create mode 100644 tensorflow/examples/tutorials/monitors/__init__.py diff --git a/tensorflow/examples/tutorials/estimators/__init__.py b/tensorflow/examples/tutorials/estimators/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tensorflow/examples/tutorials/input_fn/__init__.py b/tensorflow/examples/tutorials/input_fn/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tensorflow/examples/tutorials/layers/__init__.py b/tensorflow/examples/tutorials/layers/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tensorflow/examples/tutorials/monitors/__init__.py b/tensorflow/examples/tutorials/monitors/__init__.py new file mode 100644 index 0000000000..e69de29bb2 -- GitLab From aed22c552905d74de04c98b34aabedd12926790a Mon Sep 17 00:00:00 2001 From: ADiegoCAlonso Date: Sat, 21 Apr 2018 11:56:10 +0200 Subject: [PATCH 112/434] Specify float32 as float type instead of float64 --- tensorflow/examples/tutorials/monitors/iris_monitors.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/examples/tutorials/monitors/iris_monitors.py b/tensorflow/examples/tutorials/monitors/iris_monitors.py index 850d105f7b..a2b7fe6023 100644 --- a/tensorflow/examples/tutorials/monitors/iris_monitors.py +++ b/tensorflow/examples/tutorials/monitors/iris_monitors.py @@ -32,9 +32,9 @@ IRIS_TEST = os.path.join(os.path.dirname(__file__), "iris_test.csv") def main(unused_argv): # Load datasets. training_set = tf.contrib.learn.datasets.base.load_csv_with_header( - filename=IRIS_TRAINING, target_dtype=np.int, features_dtype=np.float) + filename=IRIS_TRAINING, target_dtype=np.int, features_dtype=np.float32) test_set = tf.contrib.learn.datasets.base.load_csv_with_header( - filename=IRIS_TEST, target_dtype=np.int, features_dtype=np.float) + filename=IRIS_TEST, target_dtype=np.int, features_dtype=np.float32) validation_metrics = { "accuracy": @@ -83,7 +83,7 @@ def main(unused_argv): # Classify two new flower samples. new_samples = np.array( - [[6.4, 3.2, 4.5, 1.5], [5.8, 3.1, 5.0, 1.7]], dtype=float) + [[6.4, 3.2, 4.5, 1.5], [5.8, 3.1, 5.0, 1.7]], dtype=np.float32) y = list(classifier.predict(new_samples)) print("Predictions: {}".format(str(y))) -- GitLab From ddda9acc9b922a9983128fc2e47f3541b8e456bc Mon Sep 17 00:00:00 2001 From: Joe Yearsley Date: Sat, 21 Apr 2018 17:12:37 +0100 Subject: [PATCH 113/434] Update fold_old_batch_norms.cc Updated as requested --- tensorflow/tools/graph_transforms/fold_old_batch_norms.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/tools/graph_transforms/fold_old_batch_norms.cc b/tensorflow/tools/graph_transforms/fold_old_batch_norms.cc index 988ba25e36..f1d361e07d 100644 --- a/tensorflow/tools/graph_transforms/fold_old_batch_norms.cc +++ b/tensorflow/tools/graph_transforms/fold_old_batch_norms.cc @@ -159,7 +159,7 @@ Status FuseScaleOffsetToConvWeights(const std::vector& scale_values, NodeDef bias_add_node; bias_add_node.set_op("BiasAdd"); bias_add_node.set_name(conv_output_name); - if (HasAttr(conv_node, "data_format")) { + if (!conv_node.attr().count("data_format")) { CopyNodeAttr(conv_node, "data_format", "data_format", &bias_add_node); } CopyNodeAttr(conv_node, "T", "T", &bias_add_node); -- GitLab From 31dcaa089bb7e504b85807e9bdb96be2858f1b98 Mon Sep 17 00:00:00 2001 From: Yunxing Dai Date: Fri, 20 Apr 2018 18:31:39 -0700 Subject: [PATCH 114/434] [XLA][Doc]Fix up operation semantics of BatchNorm. We somehow committed an old version of the doc (see #, the lhs is what we wanted and the rhs is what got committed). This CL reverts last change to that CL. PiperOrigin-RevId: 193751762 --- .../performance/xla/operation_semantics.md | 60 ++++++++++--------- 1 file changed, 32 insertions(+), 28 deletions(-) diff --git a/tensorflow/docs_src/performance/xla/operation_semantics.md b/tensorflow/docs_src/performance/xla/operation_semantics.md index 8373a1219d..f530fe1206 100644 --- a/tensorflow/docs_src/performance/xla/operation_semantics.md +++ b/tensorflow/docs_src/performance/xla/operation_semantics.md @@ -25,7 +25,7 @@ Calculates gradients of batch norm. `BatchNormGrad(operand, scale, mean, variance, grad_output, epsilon, feature_index)` | Arguments | Type | Semantics | -| -------------- | ----------------------- | -------------------------------- | +| --------------- | ----------------------- | -------------------------------- | | `operand` | `ComputationDataHandle` | n dimensional array to be | : : : normalized (x) : | `scale` | `ComputationDataHandle` | 1 dimensional array | @@ -45,31 +45,37 @@ feature dimension in `operand`), the operation calculates the gradients with respect to `operand`, `offset` and `scale` across all the other dimensions. The `feature_index` must be a valid index for the feature dimension in `operand`. -The three gradients are defined by the following formulas (Assuming a -4-dimensional tensor as `operand` and (l) is the index for feature dimension): - -\\( coef_l = \frac{1}{mwh}\sum_{i=1}^m\sum_{j=1}^w\sum_{k=1}^h (\nabla y_{ijkl} * (x_{ijkl} - \mu_l) / (\sigma^2_{l}+\epsilon)) \\) - -\\( \nabla x_{ijkl} = \gamma_{l} * (1/\sqrt{\sigma^2_{l}+\epsilon}) * [\nabla y_{ijkl} - mean(\nabla y) - (x_{ijkl} - \mu_{l}) * coef_l] \\) - -\\( \nabla \beta_l = \sum_{i=1}^m\sum_{j=1}^w\sum_{k=1}^h \nabla y_{ijkl} \\) - -\\( \nabla \gamma_l = \sum_{i=1}^m\sum_{j=1}^w\sum_{k=1}^h \nabla y_{ijkl} * ((x_{ijkl} - \mu_l) / \sqrt{\sigma^2_{l}+\epsilon}) \\) - -The inputs `mean` and `variance` represents moments value +The three gradients are defined by the following formulas (assuming a +4-dimensional tensor as `operand` and with feature dimension index \\(l\\), +batch size `m` and spatial sizes `w` and `h`): + +\\[ \begin{split} c_l&= +\frac{1}{mwh}\sum_{i=1}^m\sum_{j=1}^w\sum_{k=1}^h +\left( \nabla y_{ijkl} \frac{x_{ijkl} - \mu_l}{\sigma^2_l+\epsilon} \right) +\\\\ +\nabla x_{ijkl} &= \frac{\gamma_{l}}{\sqrt{\sigma^2_{l}+\epsilon}} +\left( \nabla y_{ijkl} - \mathrm{mean}(\nabla y) - c_l (x_{ijkl} - \mu_{l}) +\right) +\\\\ +\nabla \gamma_l &= \sum_{i=1}^m\sum_{j=1}^w\sum_{k=1}^h \left( \nabla y_{ijkl} +\frac{x_{ijkl} - \mu_l}{\sqrt{\sigma^2_{l}+\epsilon}} \right) +\\\\\ +\nabla \beta_l &= \sum_{i=1}^m\sum_{j=1}^w\sum_{k=1}^h \nabla y_{ijkl} +\end{split} \\] + +The inputs `mean` and `variance` represent moments value across batch and spatial dimensions. The output type is a tuple of three handles: -|Outputs | Type | Semantics | -|------------- | ----------------------- | ------------------------------------ | -|`grad_operand`| `ComputationDataHandle` | gradient with respect to input | -: : : `operand` (\\( \nabla x\\)) : -|`grad_scale` | `ComputationDataHandle` | gradient with respect to input | -: : : `scale` (\\( \nabla \gamma\\)) : -|`grad_offset` | `ComputationDataHandle` | gradient with respect to input | -: : : `offset`(\\( \nabla \beta\\)) : - +| Outputs | Type | Semantics | +| ------------- | ----------------------- | --------------------------------- | +| `grad_operand` | `ComputationDataHandle` | gradient with respect to input | +: : : `operand` (\\( \nabla x\\)) : +| `grad_scale` | `ComputationDataHandle` | gradient with respect to input | +: : : `scale` (\\( \nabla \gamma\\)) : +| `grad_offset` | `ComputationDataHandle` | gradient with respect to input | +: : : `offset`(\\( \nabla \beta\\)) : ## BatchNormInference @@ -440,13 +446,11 @@ area and a computation is performed for each possible position of the window. | `lhs` | `ComputationDataHandle` | rank n+2 array of inputs | | `rhs` | `ComputationDataHandle` | rank n+2 array of kernel | : : : weights : -| `window_strides` | `ArraySlice` | size n array of kernel strides| -| `padding` | `ArraySlice` | n-d array of kernel strides | +| `padding` | `ArraySlice>` : padding : -| `lhs_dilation` | `ArraySlice` | size n lhs dilation factor | -: : : array | -| `rhs_dilation` | `ArraySlice` | size n rhs dilation factor -: : : array | +| `lhs_dilation` | `ArraySlice` | n-d lhs dilation factor array | +| `rhs_dilation` | `ArraySlice` | n-d rhs dilation factor array | Let n be the number of spatial dimensions. The `lhs` argument is a rank n+2 array describing the base area. This is called the input, even though of course -- GitLab From 2b5d4f794cc9c2740d27c0e8c1af2b511810e00b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 20 Apr 2018 18:37:55 -0700 Subject: [PATCH 115/434] [XLA] Redesign: implement XlaComputation::Snapshot, and Client::LoadSnapshot. PiperOrigin-RevId: 193752146 --- tensorflow/compiler/xla/client/client.cc | 5 +++++ tensorflow/compiler/xla/client/client.h | 3 +++ tensorflow/compiler/xla/client/xla_client/BUILD | 2 +- .../compiler/xla/client/xla_client/xla_computation.cc | 11 +++++++++++ .../compiler/xla/client/xla_client/xla_computation.h | 4 ++++ tensorflow/compiler/xla/service/executable.cc | 6 +++--- tensorflow/compiler/xla/service/executable.h | 4 ++-- tensorflow/compiler/xla/service/hlo.proto | 2 +- 8 files changed, 30 insertions(+), 7 deletions(-) diff --git a/tensorflow/compiler/xla/client/client.cc b/tensorflow/compiler/xla/client/client.cc index f0f94298a0..328e1b8fa8 100644 --- a/tensorflow/compiler/xla/client/client.cc +++ b/tensorflow/compiler/xla/client/client.cc @@ -235,6 +235,11 @@ StatusOr Client::LoadSnapshot(const SessionModule& module) { return Computation(stub_, response.computation()); } +StatusOr Client::LoadSnapshot(const HloSnapshot& module) { + TF_RET_CHECK(module.has_hlo() && module.hlo().has_hlo_module()); + return XlaComputation(module.hlo().hlo_module()); +} + StatusOr> Client::Execute( const Computation& computation, tensorflow::gtl::ArraySlice arguments, diff --git a/tensorflow/compiler/xla/client/client.h b/tensorflow/compiler/xla/client/client.h index 14c685d94e..a63ff4c56d 100644 --- a/tensorflow/compiler/xla/client/client.h +++ b/tensorflow/compiler/xla/client/client.h @@ -255,6 +255,9 @@ class Client { StatusOr LoadSnapshot(const SessionModule& module); + // TODO(b/74197823): This is a part of a NOT YET ready refactor. + StatusOr LoadSnapshot(const HloSnapshot& module); + ServiceInterface* stub() { return stub_; } private: diff --git a/tensorflow/compiler/xla/client/xla_client/BUILD b/tensorflow/compiler/xla/client/xla_client/BUILD index 31fa1241ee..0d6e207971 100644 --- a/tensorflow/compiler/xla/client/xla_client/BUILD +++ b/tensorflow/compiler/xla/client/xla_client/BUILD @@ -31,9 +31,9 @@ cc_library( hdrs = ["xla_computation.h"], deps = [ "//tensorflow/compiler/xla:status_macros", + "//tensorflow/compiler/xla:util", "//tensorflow/compiler/xla:xla_data_proto", "//tensorflow/compiler/xla/service:hlo_proto", - "//tensorflow/core:lib", ], ) diff --git a/tensorflow/compiler/xla/client/xla_client/xla_computation.cc b/tensorflow/compiler/xla/client/xla_client/xla_computation.cc index a6752c6010..72e3935696 100644 --- a/tensorflow/compiler/xla/client/xla_client/xla_computation.cc +++ b/tensorflow/compiler/xla/client/xla_client/xla_computation.cc @@ -17,7 +17,9 @@ limitations under the License. #include +#include "tensorflow/compiler/xla/ptr_util.h" #include "tensorflow/compiler/xla/status_macros.h" +#include "tensorflow/compiler/xla/util.h" namespace xla { @@ -26,4 +28,13 @@ StatusOr XlaComputation::GetProgramShape() const { return proto_.program_shape(); } +StatusOr> XlaComputation::Snapshot() const { + if (IsNull()) { + return InvalidArgument("Computation is invalid."); + } + auto session = MakeUnique(); + *session->mutable_hlo()->mutable_hlo_module() = proto_; + return std::move(session); +} + } // namespace xla diff --git a/tensorflow/compiler/xla/client/xla_client/xla_computation.h b/tensorflow/compiler/xla/client/xla_client/xla_computation.h index 7ad212aa24..b70b57e9ff 100644 --- a/tensorflow/compiler/xla/client/xla_client/xla_computation.h +++ b/tensorflow/compiler/xla/client/xla_client/xla_computation.h @@ -48,6 +48,10 @@ class XlaComputation { const HloModuleProto& proto() const { return proto_; } + // Requests that we snapshot the computation into a serializable protocol + // buffer form. + StatusOr> Snapshot() const; + // Returns true if this object is a null Computation. bool IsNull() const { return unique_id_ == -1; } diff --git a/tensorflow/compiler/xla/service/executable.cc b/tensorflow/compiler/xla/service/executable.cc index 8218b5f7c8..be19b3ff04 100644 --- a/tensorflow/compiler/xla/service/executable.cc +++ b/tensorflow/compiler/xla/service/executable.cc @@ -163,9 +163,9 @@ Status Executable::DumpSessionModule() { result); } -/* static */ Status Executable::DumpToDirectory(const string& directory_path, - string filename, - const HloSession& hlo_session) { +/* static */ Status Executable::DumpToDirectory( + const string& directory_path, string filename, + const HloSnapshot& hlo_session) { tensorflow::Env* env = tensorflow::Env::Default(); if (!env->IsDirectory(directory_path).ok()) { // NB! CreateDir does not work reliably with multiple XLA threads -- two diff --git a/tensorflow/compiler/xla/service/executable.h b/tensorflow/compiler/xla/service/executable.h index bdbe119120..0c95f1a361 100644 --- a/tensorflow/compiler/xla/service/executable.h +++ b/tensorflow/compiler/xla/service/executable.h @@ -156,9 +156,9 @@ class Executable { static Status DumpToDirectory(const string& directory_path, string filename, const SessionModule& session_module); - // Dump hlo_session to directory_path/filename. + // Dump hlo snapshot to directory_path/filename. static Status DumpToDirectory(const string& directory_path, string filename, - const HloSession& hlo_session); + const HloSnapshot& hlo_session); protected: mutable tensorflow::mutex mutex_; diff --git a/tensorflow/compiler/xla/service/hlo.proto b/tensorflow/compiler/xla/service/hlo.proto index 0c3eb7dcb4..aa6860880b 100644 --- a/tensorflow/compiler/xla/service/hlo.proto +++ b/tensorflow/compiler/xla/service/hlo.proto @@ -300,7 +300,7 @@ message HloProto { // Encapsulates HloProto together with the arguments, result, and // execution_platform. This message is used for purposes such as // analysis/replay/file-storage. -message HloSession { +message HloSnapshot { // The hlo graph. HloProto hlo = 1; -- GitLab From 1796d17b8b1fa598627a590fad0ef81d138af558 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 20 Apr 2018 20:11:19 -0700 Subject: [PATCH 116/434] Fix heuristic for computing gradients of gradients when there are outside_compilation clusters present, to stop creating cycles. PiperOrigin-RevId: 193757109 --- tensorflow/contrib/tpu/python/tpu/tpu.py | 38 +++++++----------------- 1 file changed, 10 insertions(+), 28 deletions(-) diff --git a/tensorflow/contrib/tpu/python/tpu/tpu.py b/tensorflow/contrib/tpu/python/tpu/tpu.py index a1690dadff..7b8786304c 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu.py @@ -173,36 +173,18 @@ class TPUReplicateContext(control_flow_ops.XLAControlFlowContext): # gradients, and put the gradient of X in cluster # 'root_cluster.gradient_uid'. # - # When the gradient code adds multiple Ops, it asks them to - # be colocated either with the original Op X, or with one of - # the preceding Ops that was added to the gradient. In other - # words, we want to detect the case where we are colocating - # with an Op that is in cluster root_cluster.gradient_uid - # and put the new Op in that same cluster if the - # gradient_uid is the same (the case that we are in the same - # invocation of gradients, and just adding new Ops to the - # cluster); and in a different cluster if the gradient_uids - # are different (the case that we are in a new invocation of - # gradients, taking the gradient of a previously-computed - # gradient). + # When taking a gradient of a gradient, some ops will be + # colocated with Op in the forward pass (e.g., cluster + # root_cluster) and some in the backward pass (e.g., cluster + # root_cluster.initial_gradient_uid). We need all of the + # grad-of-grad ops to be in the same cluster to avoid cyclic + # dependencies between clusters. We adopt a heuristic that + # puts any op clustered with root_cluster. in + # root_cluster.gradient_uid, even if xxx was + # initial_gradient_uid. self._in_gradient_colocation = op parts = outside_attr.split(".") - if len(parts) > 1: - uid = parts[-1] - if uid == gradient_uid: - # Keep using the same cluster - cluster = outside_attr - else: - # We're taking the gradient of a gradient so make a new - # cluster attr, adding a new '.uid' on the end to - # preserve the invariant that the gradient_uid is the - # suffix after the last '.' in the attr. - cluster = outside_attr + "." + gradient_uid - else: - # We're taking the gradient of an Op in the forward pass, so - # make a new cluster combining the Op's cluster and the - # gradient id. - cluster = outside_attr + "." + gradient_uid + cluster = parts[0] + "." + gradient_uid self._EnterOutsideCompilationScope(cluster=cluster) except ValueError: # The attr was not present: do nothing. -- GitLab From 28b8a3c74f93f9238fa626ec7d32fbddcb56b0a8 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sat, 21 Apr 2018 08:16:47 -0700 Subject: [PATCH 117/434] Allow output has a different shape from input in the image.transform (#17011). PiperOrigin-RevId: 193788768 --- tensorflow/contrib/image/kernels/image_ops.cc | 7 ++- tensorflow/contrib/image/kernels/image_ops.h | 2 +- tensorflow/contrib/image/ops/image_ops.cc | 54 +++++++++++++++++-- .../python/kernel_tests/image_ops_test.py | 30 +++++++++++ .../contrib/image/python/ops/image_ops.py | 39 ++++++++------ 5 files changed, 108 insertions(+), 24 deletions(-) diff --git a/tensorflow/contrib/image/kernels/image_ops.cc b/tensorflow/contrib/image/kernels/image_ops.cc index c2e32da133..ae4b1ba62a 100644 --- a/tensorflow/contrib/image/kernels/image_ops.cc +++ b/tensorflow/contrib/image/kernels/image_ops.cc @@ -70,6 +70,7 @@ class ImageProjectiveTransform : public OpKernel { void Compute(OpKernelContext* ctx) override { const Tensor& images_t = ctx->input(0); const Tensor& transform_t = ctx->input(1); + const Tensor& output_dim = ctx->input(2); OP_REQUIRES(ctx, images_t.shape().dims() == 4, errors::InvalidArgument("Input images must have rank 4")); OP_REQUIRES(ctx, @@ -83,7 +84,11 @@ class ImageProjectiveTransform : public OpKernel { auto images = images_t.tensor(); auto transform = transform_t.matrix(); Tensor* output_t; - OP_REQUIRES_OK(ctx, ctx->allocate_output(0, images_t.shape(), &output_t)); + // Image is NHWC format. + auto output_shape = images_t.shape(); + output_shape.set_dim(1, output_dim.vec()(0)); + output_shape.set_dim(2, output_dim.vec()(1)); + OP_REQUIRES_OK(ctx, ctx->allocate_output(0, output_shape, &output_t)); auto output = output_t->tensor(); (FillProjectiveTransform(interpolation_))( ctx->eigen_device(), &output, images, transform); diff --git a/tensorflow/contrib/image/kernels/image_ops.h b/tensorflow/contrib/image/kernels/image_ops.h index ad50133061..2320329b92 100644 --- a/tensorflow/contrib/image/kernels/image_ops.h +++ b/tensorflow/contrib/image/kernels/image_ops.h @@ -161,7 +161,7 @@ struct FillProjectiveTransform { void operator()(const Device& device, OutputType* output, const InputType& images, const TransformsType& transform) const { - output->device(device) = images.generate( + output->device(device) = output->generate( ProjectiveGenerator(images, transform, interpolation_)); } }; diff --git a/tensorflow/contrib/image/ops/image_ops.cc b/tensorflow/contrib/image/ops/image_ops.cc index 68771b3d05..e97267fb89 100644 --- a/tensorflow/contrib/image/ops/image_ops.cc +++ b/tensorflow/contrib/image/ops/image_ops.cc @@ -19,9 +19,55 @@ limitations under the License. namespace tensorflow { +using shape_inference::DimensionHandle; using shape_inference::InferenceContext; using shape_inference::ShapeHandle; +namespace { + +// Sets output[0] to shape [batch_dim,height,width,channel_dim], where +// height and width come from the size_tensor. +Status SetOutputToSizedImage(InferenceContext* c, DimensionHandle batch_dim, + int size_input_idx, DimensionHandle channel_dim) { + // Verify shape of size input. + ShapeHandle size; + TF_RETURN_IF_ERROR(c->WithRank(c->input(size_input_idx), 1, &size)); + DimensionHandle unused; + TF_RETURN_IF_ERROR(c->WithValue(c->Dim(size, 0), 2, &unused)); + + // Get size values from the size tensor. + const Tensor* size_tensor = c->input_tensor(size_input_idx); + DimensionHandle width; + DimensionHandle height; + if (size_tensor == nullptr) { + width = c->UnknownDim(); + height = c->UnknownDim(); + } else { + // TODO(petewarden) - Remove once we have constant evaluation in C++ only. + if (size_tensor->dtype() != DT_INT32) { + return errors::InvalidArgument( + "Bad size input type for SetOutputToSizedImage: Expected DT_INT32 " + "but got ", + DataTypeString(size_tensor->dtype()), " for input #", size_input_idx, + " in ", c->DebugString()); + } + auto vec = size_tensor->vec(); + height = c->MakeDim(vec(0)); + width = c->MakeDim(vec(1)); + } + c->set_output(0, c->MakeShape({batch_dim, height, width, channel_dim})); + return Status::OK(); +} + +Status ResizeShapeFn(InferenceContext* c) { + ShapeHandle input; + TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &input)); + return SetOutputToSizedImage(c, c->Dim(input, 0), 2 /* size_input_idx */, + c->Dim(input, 3)); +} + +} // namespace + // TODO(ringwalt): Add a "fill_mode" argument with "constant", "mirror", etc. // TODO(ringwalt): Add a "fill_constant" argument for constant mode (default 0). // TODO(ringwalt): Add an "output_shape" argument. This is sufficient to @@ -29,13 +75,11 @@ using shape_inference::ShapeHandle; REGISTER_OP("ImageProjectiveTransform") .Input("images: dtype") .Input("transforms: float32") + .Input("output_shape: int32") .Attr("dtype: {uint8, int32, int64, float32, float64}") .Attr("interpolation: string") .Output("transformed_images: dtype") - .SetShapeFn([](InferenceContext* c) { - c->set_output(0, c->input(0)); - return Status::OK(); - }) + .SetShapeFn(ResizeShapeFn) .Doc(R"doc( Applies the given transform to each of the images. @@ -49,7 +93,7 @@ If one row of `transforms` is `[a0, a1, a2, b0, b1, b2, c0, c1]`, then it maps the *output* point `(x, y)` to a transformed *input* point `(x', y') = ((a0 x + a1 y + a2) / k, (b0 x + b1 y + b2) / k)`, where `k = c0 x + c1 y + 1`. If the transformed point lays outside of the input -image, the output pixel is set to 0. The output is the same size as the input, +image, the output pixel is set to 0. images: 4D `Tensor`, input image(s) in NHWC format. transforms: 2D `Tensor`, projective transform(s) to apply to the image(s). diff --git a/tensorflow/contrib/image/python/kernel_tests/image_ops_test.py b/tensorflow/contrib/image/python/kernel_tests/image_ops_test.py index b50177ae56..c0151d320f 100644 --- a/tensorflow/contrib/image/python/kernel_tests/image_ops_test.py +++ b/tensorflow/contrib/image/python/kernel_tests/image_ops_test.py @@ -195,10 +195,40 @@ class ImageOpsTest(test_util.TensorFlowTestCase): x_init_value=test_image) self.assertLess(left_err, 1e-10) + def _test_grad_different_shape(self, input_shape, output_shape): + with self.test_session(): + test_image_shape = input_shape + test_image = np.random.randn(*test_image_shape) + test_image_tensor = constant_op.constant( + test_image, shape=test_image_shape) + test_transform = image_ops.angles_to_projective_transforms( + np.pi / 2, 4, 4) + + if len(output_shape) == 2: + resize_shape = output_shape + elif len(output_shape) == 3: + resize_shape = output_shape[0:2] + elif len(output_shape) == 4: + resize_shape = output_shape[1:3] + output = image_ops.transform( + images=test_image_tensor, + transforms=test_transform, + output_shape=resize_shape) + left_err = gradient_checker.compute_gradient_error( + test_image_tensor, + test_image_shape, + output, + output_shape, + x_init_value=test_image) + self.assertLess(left_err, 1e-10) + def test_grad(self): self._test_grad([16, 16]) self._test_grad([4, 12, 12]) self._test_grad([3, 4, 12, 12]) + self._test_grad_different_shape([16, 16], [8, 8]) + self._test_grad_different_shape([4, 12, 3], [8, 24, 3]) + self._test_grad_different_shape([3, 4, 12, 3], [3, 8, 24, 3]) class BipartiteMatchTest(test_util.TensorFlowTestCase): diff --git a/tensorflow/contrib/image/python/ops/image_ops.py b/tensorflow/contrib/image/python/ops/image_ops.py index c139ae89d8..a8d8cf8c5c 100644 --- a/tensorflow/contrib/image/python/ops/image_ops.py +++ b/tensorflow/contrib/image/python/ops/image_ops.py @@ -212,7 +212,11 @@ def translations_to_projective_transforms(translations, name=None): axis=1) -def transform(images, transforms, interpolation="NEAREST", name=None): +def transform(images, + transforms, + interpolation="NEAREST", + output_shape=None, + name=None): """Applies the given transform(s) to the image(s). Args: @@ -229,6 +233,10 @@ def transform(images, transforms, interpolation="NEAREST", name=None): the transform mapping input points to output points. Note that gradients are not backpropagated into transformation parameters. interpolation: Interpolation mode. Supported values: "NEAREST", "BILINEAR". + output_shape: Output dimesion after the transform, [height, width]. + If None, output is the same size as input image. + + name: The name of the op. Returns: Image(s) with the same type and shape as `images`, with the given @@ -255,6 +263,13 @@ def transform(images, transforms, interpolation="NEAREST", name=None): else: raise TypeError("Images should have rank between 2 and 4.") + if output_shape is None: + output_shape = array_ops.shape(images)[1:3] + elif len(output_shape) != 2: + raise TypeError( + "output_shape must either be None or a vector of 2 elements. %s" % + str(output_shape)) + if len(transform_or_transforms.get_shape()) == 1: transforms = transform_or_transforms[None] elif transform_or_transforms.get_shape().ndims is None: @@ -265,7 +280,7 @@ def transform(images, transforms, interpolation="NEAREST", name=None): else: raise TypeError("Transforms should have rank 1 or 2.") output = gen_image_ops.image_projective_transform( - images, transforms, interpolation=interpolation.upper()) + images, transforms, output_shape, interpolation=interpolation.upper()) if len(image_or_images.get_shape()) == 2: return output[0, :, :, 0] elif len(image_or_images.get_shape()) == 3: @@ -375,14 +390,6 @@ def _image_projective_transform_grad(op, grad): if image_or_images.dtype.base_dtype not in _IMAGE_DTYPES: raise TypeError("Invalid dtype %s." % image_or_images.dtype) - if len(image_or_images.get_shape()) == 2: - images = image_or_images[None, :, :, None] - elif len(image_or_images.get_shape()) == 3: - images = image_or_images[None, :, :, :] - elif len(image_or_images.get_shape()) == 4: - images = image_or_images - else: - raise TypeError("Images should have rank between 2 and 4") if len(transform_or_transforms.get_shape()) == 1: transforms = transform_or_transforms[None] elif len(transform_or_transforms.get_shape()) == 2: @@ -395,13 +402,11 @@ def _image_projective_transform_grad(op, grad): inverse = linalg_ops.matrix_inverse(transforms) transforms = matrices_to_flat_transforms(inverse) output = gen_image_ops.image_projective_transform( - grad, transforms, interpolation=interpolation) - if len(image_or_images.get_shape()) == 2: - return [output[0, :, :, 0], None] - elif len(image_or_images.get_shape()) == 3: - return [output[0, :, :, :], None] - else: - return [output, None] + images=grad, + transforms=transforms, + output_shape=array_ops.shape(image_or_images)[1:3], + interpolation=interpolation) + return [output, None, None] def bipartite_match(distance_mat, -- GitLab From fe4146d884c8805fceaa6d73d0bcc7fbf21df7cd Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Sat, 21 Apr 2018 18:42:03 +0000 Subject: [PATCH 118/434] Update .gitignore for cmake generated files After running cmake on Linux with: ``` tensorflow/tools/ci_build/ci_build.sh CMAKE tensorflow/tools/ci_build/builds/cmake.sh ``` the following file is left: ``` ubuntu@ubuntu:~/tensorflow$ git status On branch master Your branch is up-to-date with 'origin/master'. Untracked files: (use "git add ..." to include in what will be committed) api_init_files_list.txt nothing added to commit but untracked files present (use "git add" to track) ubuntu@ubuntu:~/tensorflow$ ``` This fix updates the .gitignore file so that cmake generated files is not added with git inadvertently. Signed-off-by: Yong Tang --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index be75938ec4..828bbe9bd3 100644 --- a/.gitignore +++ b/.gitignore @@ -27,6 +27,7 @@ Podfile.lock /tensorflow/contrib/lite/examples/ios/simple/data/*.txt /tensorflow/contrib/lite/examples/ios/simple/data/*.tflite xcuserdata/** +/api_init_files_list.txt # Android .gradle -- GitLab From 8f558d67450f3ec6aa0d96af9fad84042d6b79df Mon Sep 17 00:00:00 2001 From: AG Ramesh Date: Sat, 21 Apr 2018 15:25:37 -0700 Subject: [PATCH 119/434] Changed calls to the depreacted StringPiece::contains with str_util::StrContains --- tensorflow/core/graph/mkl_layout_pass.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index 5368774f2d..72a13d4da7 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -547,14 +547,14 @@ class MklLayoutRewritePass : public GraphOptimizationPass { // If Op has been specifically assigned to a non-CPU device, then No. if (!n->assigned_device_name().empty() && - !StringPiece(n->assigned_device_name()).contains(kCPUDeviceSubStr)) { + !str_util::StrContains(n->assigned_device_name(),kCPUDeviceSubStr)) { result = false; reason = "Op has been assigned a runtime device that is not CPU."; } // If user has specifically assigned this op to a non-CPU device, then No. if (!n->def().device().empty() && - !StringPiece(n->def().device()).contains(kCPUDeviceSubStr)) { + !str_util::StrContains(n->def().device(),kCPUDeviceSubStr)) { result = false; reason = "User has assigned a device that is not CPU."; } @@ -2691,14 +2691,14 @@ class MklLayoutRewritePass : public GraphOptimizationPass { // If Op has been specifically assigned to a non-CPU device, then No. if (!n->assigned_device_name().empty() && - !StringPiece(n->assigned_device_name()).contains(kCPUDeviceSubStr)) { + !str_util::StrContains(n->assigned_device_name(),kCPUDeviceSubStr)) { result = false; reason = "Op has been assigned a runtime device that is not CPU."; } // If user has specifically assigned this op to a non-CPU device, then No. if (!n->def().device().empty() && - !StringPiece(n->def().device()).contains(kCPUDeviceSubStr)) { + !str_util::StrContains(n->def().device(),kCPUDeviceSubStr)) { result = false; reason = "User has assigned a device that is not CPU."; } -- GitLab From 5518db48074c3bd125089bccc3edec03c192bf56 Mon Sep 17 00:00:00 2001 From: Bryan Heden Date: Sat, 21 Apr 2018 19:45:42 -0500 Subject: [PATCH 120/434] update $ source spacing When viewing install_linux, the spacing was off for 'Next Steps' section. --- tensorflow/docs_src/install/install_linux.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/docs_src/install/install_linux.md b/tensorflow/docs_src/install/install_linux.md index 1a349f5412..02af21bcf2 100644 --- a/tensorflow/docs_src/install/install_linux.md +++ b/tensorflow/docs_src/install/install_linux.md @@ -231,7 +231,7 @@ Note that you must activate the Virtualenv environment each time you use TensorFlow. If the Virtualenv environment is not currently active, invoke one of the following commands: -
 $ source ~/tensorflow/bin/activate      # bash, sh, ksh, or zsh
+
$ source ~/tensorflow/bin/activate      # bash, sh, ksh, or zsh
 $ source ~/tensorflow/bin/activate.csh  # csh or tcsh
When the Virtualenv environment is active, you may run -- GitLab From 5b7b354efe3eff5756623b04b87b4cd5272f82cc Mon Sep 17 00:00:00 2001 From: David Majnemer Date: Sat, 21 Apr 2018 21:37:48 -0700 Subject: [PATCH 121/434] [XLA] Add an option to the CSE pass to ignore non-fusion computations PiperOrigin-RevId: 193814728 --- tensorflow/compiler/xla/service/hlo_cse.cc | 4 ++++ tensorflow/compiler/xla/service/hlo_cse.h | 11 +++++++---- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_cse.cc b/tensorflow/compiler/xla/service/hlo_cse.cc index cd7cbbdd71..3b22c93733 100644 --- a/tensorflow/compiler/xla/service/hlo_cse.cc +++ b/tensorflow/compiler/xla/service/hlo_cse.cc @@ -97,6 +97,10 @@ StatusOr HloCSE::Run(HloModule* module) { const std::function eq_computations = std::equal_to(); for (auto* computation : module->computations()) { + if (only_fusion_computations_ && !computation->IsFusionComputation()) { + continue; + } + changed |= CombineConstants(computation, is_layout_sensitive_); std::list post_order = diff --git a/tensorflow/compiler/xla/service/hlo_cse.h b/tensorflow/compiler/xla/service/hlo_cse.h index 70096e07a2..5e2b348bdd 100644 --- a/tensorflow/compiler/xla/service/hlo_cse.h +++ b/tensorflow/compiler/xla/service/hlo_cse.h @@ -29,9 +29,11 @@ class HloCSE : public HloPassInterface { public: // If is_layout_sensitive is true, then the simplifier preserves layout during // transformation. Otherwise, layout is ignored. - explicit HloCSE(bool is_layout_sensitive) - : is_layout_sensitive_(is_layout_sensitive) {} - ~HloCSE() override {} + explicit HloCSE(bool is_layout_sensitive, + bool only_fusion_computations = false) + : is_layout_sensitive_(is_layout_sensitive), + only_fusion_computations_(only_fusion_computations) {} + ~HloCSE() override = default; tensorflow::StringPiece name() const override { return "cse"; } // Run CSE on the given module. Returns whether the module was changed (common @@ -39,7 +41,8 @@ class HloCSE : public HloPassInterface { StatusOr Run(HloModule* module) override; private: - bool is_layout_sensitive_; + const bool is_layout_sensitive_; + const bool only_fusion_computations_; }; } // namespace xla -- GitLab From 292d9b92c93e97e98284787a1a60c30553fee5cb Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sun, 22 Apr 2018 07:13:16 -0700 Subject: [PATCH 122/434] Fixed typo in crossed column code snippet. PiperOrigin-RevId: 193838865 --- tensorflow/docs_src/get_started/feature_columns.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/docs_src/get_started/feature_columns.md b/tensorflow/docs_src/get_started/feature_columns.md index d8e4bec863..9c777a0077 100644 --- a/tensorflow/docs_src/get_started/feature_columns.md +++ b/tensorflow/docs_src/get_started/feature_columns.md @@ -364,7 +364,7 @@ def make_dataset(latitude, longitude, labels): return tf.data.Dataset.from_tensor_slices((features, labels)) -# Bucketize the latitude and longitude usig the `edges` +# Bucketize the latitude and longitude using the `edges` latitude_bucket_fc = tf.feature_column.bucketized_column( tf.feature_column.numeric_column('latitude'), list(atlanta.latitude.edges)) -- GitLab From e1722aa3197b3942add6b9fb78ed50e21af693ff Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sun, 22 Apr 2018 07:29:33 -0700 Subject: [PATCH 123/434] Multi-thread implementation of ExperimentalShuffledFullyConnected using the gemmlowp threadpool. PiperOrigin-RevId: 193839485 --- .../internal/optimized/optimized_ops.h | 146 +++++++++++++----- 1 file changed, 111 insertions(+), 35 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h index d269056800..2e2721e093 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h @@ -1203,39 +1203,16 @@ void FullyConnected(const uint8* input_data, const Dims<4>& input_dims, output_activation_max, output_data, output_dims, gemm_context); } -inline void ExperimentalShuffledFullyConnected( - const uint8* input_data, const Dims<4>& input_dims, - const uint8* shuffled_weights_data, const Dims<4>& weights_dims, - const int32* bias_data, const Dims<4>& bias_dims, int32 output_multiplier, - int output_shift, int32 output_activation_min, int32 output_activation_max, - int16* output_data, const Dims<4>& output_dims, - gemmlowp::GemmContext* gemm_context) { - gemmlowp::ScopedProfilingLabel label( - "ExperimentalShuffledFullyConnected/8bit"); - (void)gemm_context; // only used in optimized code. - TFLITE_DCHECK_EQ(output_activation_min, -32768); - TFLITE_DCHECK_EQ(output_activation_max, 32767); - // TODO(benoitjacob): This really should be: - // const int batches = ArraySize(output_dims, 1); - // but the current --variable_batch hack consists in overwriting the 3rd - // dimension with the runtime batch size, as we don't keep track for each - // array of which dimension is the batch dimension in it. - const int batches = ArraySize(output_dims, 1) * ArraySize(output_dims, 2) * - ArraySize(output_dims, 3); - const int output_depth = MatchingArraySize(weights_dims, 1, output_dims, 0); - const int accum_depth = ArraySize(weights_dims, 0); - TFLITE_DCHECK(IsPackedWithoutStrides(input_dims)); - TFLITE_DCHECK(IsPackedWithoutStrides(weights_dims)); - // The experimental shuffling is an optimization for matrix*vector product. - // We aren't interested in supporting non-matrix*vector-product cases, i.e. - // batches>1. - TFLITE_DCHECK_EQ(batches, 1); - // Shuffled weights have had their sign bit (0x80) pre-flipped (xor'd) - // so that just reinterpreting them as int8 values is equivalent to - // subtracting 128 from them, thus implementing for free the subtraction of - // the zero_point value 128. - const int8* shuffled_weights_ptr = - reinterpret_cast(shuffled_weights_data); +// Internal function doing the actual arithmetic work for +// ExperimentalShuffledFullyConnected. +// May be called either directly by it (single-threaded case) or may be used +// as the 'task' for worker threads to run (multi-threaded case, see +// ExperimentalShuffledFullyConnectedWorkerTask below). +inline void ExperimentalShuffledFullyConnectedWorkerImpl( + const uint8* input_data, const int8* shuffled_weights_data, + int output_depth, int accum_depth, const int32* bias_data, + int32 output_multiplier, int output_shift, int16* output_data) { + const int8* shuffled_weights_ptr = shuffled_weights_data; #if defined USE_NEON // We'll only need to xor signbit to the input activation values, as // that xor-ing is pre-built into the shuffled weights values. @@ -1331,14 +1308,113 @@ inline void ExperimentalShuffledFullyConnected( acc = MultiplyByQuantizedMultiplier(acc, output_multiplier, -output_shift); // Saturate, cast to int16, and store to output array. - acc = std::max(acc, output_activation_min); - acc = std::min(acc, output_activation_max); + acc = std::max(acc, -32768); + acc = std::min(acc, 32767); output_data[c + i] = acc; } } #endif } +// Wraps ExperimentalShuffledFullyConnectedWorkerImpl into a Task class +// to allow using gemmlowp's threadpool. +struct ExperimentalShuffledFullyConnectedWorkerTask : gemmlowp::Task { + ExperimentalShuffledFullyConnectedWorkerTask( + const uint8* input_data, const int8* shuffled_weights_data, + int output_depth, int accum_depth, const int32* bias_data, + int32 output_multiplier, int output_shift, int16* output_data) + : input_data_(input_data), + shuffled_weights_data_(shuffled_weights_data), + output_depth_(output_depth), + accum_depth_(accum_depth), + bias_data_(bias_data), + output_multiplier_(output_multiplier), + output_shift_(output_shift), + output_data_(output_data) {} + + void Run() override { + ExperimentalShuffledFullyConnectedWorkerImpl( + input_data_, shuffled_weights_data_, output_depth_, accum_depth_, + bias_data_, output_multiplier_, output_shift_, output_data_); + } + + const uint8* input_data_; + const int8* shuffled_weights_data_; + int output_depth_; + int accum_depth_; + const int32* bias_data_; + int32 output_multiplier_; + int output_shift_; + int16* output_data_; +}; + +inline void ExperimentalShuffledFullyConnected( + const uint8* input_data, const Dims<4>& input_dims, + const uint8* shuffled_weights_data, const Dims<4>& weights_dims, + const int32* bias_data, const Dims<4>& bias_dims, int32 output_multiplier, + int output_shift, int32 output_activation_min, int32 output_activation_max, + int16* output_data, const Dims<4>& output_dims, + gemmlowp::GemmContext* gemm_context) { + gemmlowp::ScopedProfilingLabel label( + "ExperimentalShuffledFullyConnected/8bit"); + (void)gemm_context; // only used in optimized code. + TFLITE_DCHECK_EQ(output_activation_min, -32768); + TFLITE_DCHECK_EQ(output_activation_max, 32767); + // TODO(benoitjacob): This really should be: + // const int batches = ArraySize(output_dims, 1); + // but the current --variable_batch hack consists in overwriting the 3rd + // dimension with the runtime batch size, as we don't keep track for each + // array of which dimension is the batch dimension in it. + const int batches = ArraySize(output_dims, 1) * ArraySize(output_dims, 2) * + ArraySize(output_dims, 3); + const int output_depth = MatchingArraySize(weights_dims, 1, output_dims, 0); + const int accum_depth = ArraySize(weights_dims, 0); + TFLITE_DCHECK(IsPackedWithoutStrides(input_dims)); + TFLITE_DCHECK(IsPackedWithoutStrides(weights_dims)); + // The experimental shuffling is an optimization for matrix*vector product. + // We aren't interested in supporting non-matrix*vector-product cases, i.e. + // batches>1. + TFLITE_DCHECK_EQ(batches, 1); + // Shuffled weights have had their sign bit (0x80) pre-flipped (xor'd) + // so that just reinterpreting them as int8 values is equivalent to + // subtracting 128 from them, thus implementing for free the subtraction of + // the zero_point value 128. + const int8* int8_shuffled_weights_data = + reinterpret_cast(shuffled_weights_data); + + // Our GEMV kernel has 4 rows. This doesn't matter in practice for GEMV + // shapes, gemmlowp::HowManyThreads only takes that parameter because it + // matters for other kinds of GEMM shapes. + static constexpr int kKernelRows = 4; + const int thread_count = gemmlowp::HowManyThreads( + gemm_context->max_num_threads(), output_depth, 1, accum_depth); + if (thread_count == 1) { + // Single-thread case: do the computation on the current thread, don't + // use a threadpool + ExperimentalShuffledFullyConnectedWorkerImpl( + input_data, int8_shuffled_weights_data, output_depth, accum_depth, + bias_data, output_multiplier, output_shift, output_data); + return; + } + + // Multi-threaded case: use the gemmlowp context's threadpool. + TFLITE_DCHECK_GT(thread_count, 1); + std::vector tasks(thread_count); + const int kRowsPerWorker = + gemmlowp::RoundUp(output_depth / thread_count); + int row_start = 0; + for (int i = 0; i < thread_count; i++) { + int row_end = std::min(output_depth, row_start + kRowsPerWorker); + tasks[i] = new ExperimentalShuffledFullyConnectedWorkerTask( + input_data, int8_shuffled_weights_data + row_start * accum_depth, + row_end - row_start, accum_depth, bias_data + row_start, + output_multiplier, output_shift, output_data + row_start); + row_start = row_end; + } + TFLITE_DCHECK_EQ(row_start, output_depth); + gemm_context->workers_pool()->Execute(tasks); +} + template inline void ExtractPatchIntoBufferColumn( const Dims<4>& input_dims, int w, int h, int b, int kheight, int kwidth, -- GitLab From bfffd2041106dac5b7bb3efcbb311a20505ac61f Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Sun, 22 Apr 2018 14:43:21 +0000 Subject: [PATCH 124/434] Update docs to add note and examples for tf.count_nonzero with string Signed-off-by: Yong Tang --- tensorflow/python/ops/math_ops.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py index 31ce83905b..30ac001c25 100644 --- a/tensorflow/python/ops/math_ops.py +++ b/tensorflow/python/ops/math_ops.py @@ -1466,9 +1466,18 @@ def count_nonzero(input_tensor, tf.count_nonzero(x, [0, 1]) # 3 ``` + **NOTE** Strings are compared against zero-length empty string `""`. Any + string with a size greater than zero is already considered as nonzero. + + For example: + ```python + x = tf.constant(["", "a", " ", "b", ""]) + tf.count_nonzero(x) # 3, with "a", " ", and "b" as nonzero strings. + ``` + Args: - input_tensor: The tensor to reduce. Should be of numeric type, `string`, - or `bool`. + input_tensor: The tensor to reduce. Should be of numeric type, `bool`, + or `string`. axis: The dimensions to reduce. If `None` (the default), reduces all dimensions. Must be in the range `[-rank(input_tensor), rank(input_tensor))`. -- GitLab From 522e20ef9cff8a7a49322c6442d940aa556222c0 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sun, 22 Apr 2018 09:15:38 -0700 Subject: [PATCH 125/434] Change refs/unrefs of FLR. PiperOrigin-RevId: 193843055 --- tensorflow/core/common_runtime/function.cc | 52 ++++++++++--------- .../core/common_runtime/function_test.cc | 27 ++-------- .../function_threadpool_test.cc | 14 +---- .../process_function_library_runtime.cc | 21 +------- .../process_function_library_runtime.h | 3 -- .../process_function_library_runtime_test.cc | 10 ++-- 6 files changed, 38 insertions(+), 89 deletions(-) diff --git a/tensorflow/core/common_runtime/function.cc b/tensorflow/core/common_runtime/function.cc index d310520ebd..a6f637b488 100644 --- a/tensorflow/core/common_runtime/function.cc +++ b/tensorflow/core/common_runtime/function.cc @@ -209,6 +209,7 @@ class FunctionLibraryRuntimeImpl : public FunctionLibraryRuntime { // The instantiated and transformed function is encoded as a Graph // object, and an executor is created for the graph. struct Item : public core::RefCounted { + bool invalidated = false; const Graph* graph = nullptr; // Owned by exec. const FunctionLibraryDefinition* overlay_lib = nullptr; // Not owned. FunctionBody* func_graph = nullptr; @@ -284,15 +285,7 @@ FunctionLibraryRuntimeImpl::FunctionLibraryRuntimeImpl( } FunctionLibraryRuntimeImpl::~FunctionLibraryRuntimeImpl() { - // The most common patterns of FLR usage don't require the caller to - // explicitly release handles. As a result, we try to unref each item until - // it's erased. - for (auto item : items_) { - if (item.second) { - while (!item.second->Unref()) { - } - } - } + for (auto p : items_) p.second->Unref(); } // An asynchronous op kernel which executes an instantiated function @@ -497,24 +490,30 @@ Status FunctionLibraryRuntimeImpl::Instantiate( options_copy.target = device_name_; const string key = Canonicalize(function_name, attrs, options_copy); + Handle found_handle = kInvalidHandle; { mutex_lock l(mu_); - *handle = parent_->GetHandle(key); - if (*handle != kInvalidHandle) { + found_handle = parent_->GetHandle(key); + if (found_handle != kInvalidHandle) { FunctionLibraryRuntime::LocalHandle handle_on_device = - parent_->GetHandleOnDevice(device_name_, *handle); + parent_->GetHandleOnDevice(device_name_, found_handle); if (handle_on_device == kInvalidLocalHandle) { return errors::Internal("LocalHandle not found for handle ", *handle, "."); } - auto item_handle = items_.find(handle_on_device); - if (item_handle == items_.end()) { + auto iter = items_.find(handle_on_device); + if (iter == items_.end()) { return errors::Internal("LocalHandle ", handle_on_device, - " for handle ", *handle, + " for handle ", found_handle, " not found in items."); } - item_handle->second->Ref(); - return Status::OK(); + Item* item = iter->second; + if (!item->invalidated) { + *handle = found_handle; + return Status::OK(); + } + // *item is invalidated. Fall through and instantiate the given + // function_name/attrs/option again. } } @@ -546,10 +545,10 @@ Status FunctionLibraryRuntimeImpl::Instantiate( { mutex_lock l(mu_); - *handle = parent_->GetHandle(key); - if (*handle != kInvalidHandle) { + Handle found_handle_again = parent_->GetHandle(key); + if (found_handle_again != found_handle) { delete fbody; - items_[parent_->GetHandleOnDevice(device_name_, *handle)]->Ref(); + *handle = found_handle_again; } else { *handle = parent_->AddHandle(key, device_name_, next_handle_); Item* item = new Item; @@ -566,16 +565,12 @@ Status FunctionLibraryRuntimeImpl::ReleaseHandle(Handle handle) { if (!parent_->IsInstantiatedOnDevice(device_name_, handle)) { return parent_->ReleaseHandle(handle); } - LocalHandle h = parent_->GetHandleOnDevice(device_name_, handle); CHECK_NE(h, kInvalidLocalHandle); mutex_lock l(mu_); CHECK_EQ(1, items_.count(h)); Item* item = items_[h]; - if (item->Unref()) { - items_.erase(h); - TF_RETURN_IF_ERROR(parent_->RemoveHandle(handle)); - } + item->invalidated = true; // Reinstantiate later. return Status::OK(); } @@ -736,6 +731,7 @@ void FunctionLibraryRuntimeImpl::RunRemote(const Options& opts, Handle handle, // computation is done and stored in *rets, we send the return values back // to the source_device (caller) so that the ProcFLR can receive them later. std::vector* remote_args = new std::vector; + item->Ref(); ProcessFunctionLibraryRuntime::ReceiveTensorsAsync( source_device, target_device, "arg_", src_incarnation, args.size(), device_context, {}, rendezvous, remote_args, @@ -747,6 +743,7 @@ void FunctionLibraryRuntimeImpl::RunRemote(const Options& opts, Handle handle, s = frame->SetArgs(*remote_args); } if (!s.ok()) { + item->Unref(); delete frame; delete remote_args; delete exec_args; @@ -757,6 +754,7 @@ void FunctionLibraryRuntimeImpl::RunRemote(const Options& opts, Handle handle, *exec_args, [item, frame, rets, done, source_device, target_device, target_incarnation, rendezvous, device_context, remote_args, exec_args](const Status& status) { + core::ScopedUnref unref(item); Status s = status; if (s.ok()) { s = frame->ConsumeRetvals(rets); @@ -842,11 +840,13 @@ void FunctionLibraryRuntimeImpl::Run(const Options& opts, Handle handle, return; } + item->Ref(); item->exec->RunAsync( // Executor args *exec_args, // Done callback. [item, frame, rets, done, exec_args](const Status& status) { + core::ScopedUnref unref(item); Status s = status; if (s.ok()) { s = frame->ConsumeRetvals(rets); @@ -906,6 +906,7 @@ void FunctionLibraryRuntimeImpl::Run(const Options& opts, Handle handle, exec_args->runner = *run_opts.runner; exec_args->call_frame = frame; + item->Ref(); item->exec->RunAsync( // Executor args *exec_args, @@ -914,6 +915,7 @@ void FunctionLibraryRuntimeImpl::Run(const Options& opts, Handle handle, [item, frame, exec_args](DoneCallback done, // Start unbound arguments. const Status& status) { + core::ScopedUnref unref(item); delete exec_args; done(status); }, diff --git a/tensorflow/core/common_runtime/function_test.cc b/tensorflow/core/common_runtime/function_test.cc index 61b2f0e60f..373fc64007 100644 --- a/tensorflow/core/common_runtime/function_test.cc +++ b/tensorflow/core/common_runtime/function_test.cc @@ -231,19 +231,8 @@ class FunctionLibraryRuntimeTest : public ::testing::Test { return status; } FunctionLibraryRuntime::Options opts; - status = Run(flr, handle, opts, args, rets, add_runner); - if (!status.ok()) return status; - - // Release the handle and try running again. It should not succeed. - status = flr->ReleaseHandle(handle); - if (!status.ok()) return status; - - Status status2 = Run(flr, handle, opts, args, std::move(rets)); - EXPECT_TRUE(errors::IsInvalidArgument(status2)); - EXPECT_TRUE( - str_util::StrContains(status2.error_message(), "remote execution.")); - - return status; + TF_RETURN_IF_ERROR(Run(flr, handle, opts, args, rets, add_runner)); + return flr->ReleaseHandle(handle); } Status Run(FunctionLibraryRuntime* flr, FunctionLibraryRuntime::Handle handle, @@ -304,16 +293,8 @@ class FunctionLibraryRuntimeTest : public ::testing::Test { *rets[i] = retvals[i]; } - // Release the handle and try running again. It should not succeed. - status = flr->ReleaseHandle(handle); - if (!status.ok()) return status; - - Status status2 = Run(flr, handle, opts, args, std::move(rets)); - EXPECT_TRUE(errors::IsInvalidArgument(status2)); - EXPECT_TRUE( - str_util::StrContains(status2.error_message(), "remote execution.")); - - return status; + // Release the handle. + return flr->ReleaseHandle(handle); } std::unique_ptr GetFuncBody(FunctionLibraryRuntime* flr, diff --git a/tensorflow/core/common_runtime/function_threadpool_test.cc b/tensorflow/core/common_runtime/function_threadpool_test.cc index 2d09e83d01..98dac38a8c 100644 --- a/tensorflow/core/common_runtime/function_threadpool_test.cc +++ b/tensorflow/core/common_runtime/function_threadpool_test.cc @@ -144,19 +144,7 @@ class FunctionLibraryRuntimeTest : public ::testing::Test { return status; } FunctionLibraryRuntime::Options opts; - status = Run(flr, handle, opts, args, rets, add_runner); - if (!status.ok()) return status; - - // Release the handle and try running again. It should not succeed. - status = flr->ReleaseHandle(handle); - if (!status.ok()) return status; - - Status status2 = Run(flr, handle, opts, args, std::move(rets)); - EXPECT_TRUE(errors::IsInvalidArgument(status2)); - EXPECT_TRUE( - str_util::StrContains(status2.error_message(), "remote execution.")); - - return status; + return Run(flr, handle, opts, args, std::move(rets), add_runner); } Status Run(FunctionLibraryRuntime* flr, FunctionLibraryRuntime::Handle handle, diff --git a/tensorflow/core/common_runtime/process_function_library_runtime.cc b/tensorflow/core/common_runtime/process_function_library_runtime.cc index d05f146f21..e61ed8c479 100644 --- a/tensorflow/core/common_runtime/process_function_library_runtime.cc +++ b/tensorflow/core/common_runtime/process_function_library_runtime.cc @@ -181,12 +181,7 @@ FunctionLibraryRuntime::Handle ProcessFunctionLibraryRuntime::AddHandle( const string& function_key, const string& device_name, FunctionLibraryRuntime::LocalHandle local_handle) { mutex_lock l(mu_); - FunctionLibraryRuntime::Handle h = - gtl::FindWithDefault(table_, function_key, kInvalidHandle); - if (h != kInvalidHandle) { - if (function_data_.count(h) != 0) return h; - } - h = next_handle_; + auto h = next_handle_; FunctionData* fd = new FunctionData(device_name, local_handle); function_data_[h] = std::unique_ptr(fd); table_[function_key] = h; @@ -197,12 +192,7 @@ FunctionLibraryRuntime::Handle ProcessFunctionLibraryRuntime::AddHandle( FunctionLibraryRuntime::Handle ProcessFunctionLibraryRuntime::GetHandle( const string& function_key) const { mutex_lock l(mu_); - FunctionLibraryRuntime::Handle h = - gtl::FindWithDefault(table_, function_key, kInvalidHandle); - if (h != kInvalidHandle) { - if (function_data_.count(h) == 0) return kInvalidHandle; - } - return h; + return gtl::FindWithDefault(table_, function_key, kInvalidHandle); } bool ProcessFunctionLibraryRuntime::IsInstantiatedOnDevice( @@ -272,13 +262,6 @@ Status ProcessFunctionLibraryRuntime::Instantiate( return Status::OK(); } -Status ProcessFunctionLibraryRuntime::RemoveHandle( - FunctionLibraryRuntime::Handle handle) { - mutex_lock l(mu_); - function_data_.erase(handle); - return Status::OK(); -} - Status ProcessFunctionLibraryRuntime::ReleaseHandle( FunctionLibraryRuntime::Handle handle) { FunctionLibraryRuntime* flr = nullptr; diff --git a/tensorflow/core/common_runtime/process_function_library_runtime.h b/tensorflow/core/common_runtime/process_function_library_runtime.h index c7b8259f78..05e5770899 100644 --- a/tensorflow/core/common_runtime/process_function_library_runtime.h +++ b/tensorflow/core/common_runtime/process_function_library_runtime.h @@ -134,9 +134,6 @@ class ProcessFunctionLibraryRuntime { // of the device where the function is registered. string GetDeviceName(FunctionLibraryRuntime::Handle handle); - // Removes handle from the state owned by this object. - Status RemoveHandle(FunctionLibraryRuntime::Handle handle); - Status Clone(Env* env, int graph_def_version, const OptimizerOptions& optimizer_options, CustomKernelCreator custom_kernel_creator, diff --git a/tensorflow/core/common_runtime/process_function_library_runtime_test.cc b/tensorflow/core/common_runtime/process_function_library_runtime_test.cc index 4fbf2abc67..cc10e77ad2 100644 --- a/tensorflow/core/common_runtime/process_function_library_runtime_test.cc +++ b/tensorflow/core/common_runtime/process_function_library_runtime_test.cc @@ -119,12 +119,13 @@ class ProcessFunctionLibraryRuntimeTest : public ::testing::Test { EXPECT_GE(call_count, 1); // Test runner is used. - // Release the handle and then try running the function. It shouldn't - // succeed. + // Release the handle and then try running the function. It + // should still succeed. status = proc_flr_->ReleaseHandle(handle); if (!status.ok()) { return status; } + Notification done2; proc_flr_->Run(opts, handle, args, &out, [&status, &done2](const Status& s) { @@ -132,10 +133,7 @@ class ProcessFunctionLibraryRuntimeTest : public ::testing::Test { done2.Notify(); }); done2.WaitForNotification(); - EXPECT_TRUE(errors::IsNotFound(status)); - EXPECT_TRUE(str_util::StrContains(status.error_message(), "not found.")); - - return Status::OK(); + return status; } std::vector devices_; -- GitLab From d481f07549470b4a03b41f9bb588d7f7ddc85082 Mon Sep 17 00:00:00 2001 From: Yifei Feng Date: Sun, 22 Apr 2018 09:26:15 -0700 Subject: [PATCH 126/434] Remove proto header include in core/kernels. The goal is to make kernels mostly independent of proto headers, which will let us lock down our .so import PiperOrigin-RevId: 193843351 --- .../remote_fused_graph_execute_info.proto | 8 ---- tensorflow/core/kernels/BUILD | 1 + .../hexagon/hexagon_control_wrapper.cc | 1 + .../hexagon/hexagon_graph_execution_test.cc | 1 + .../kernels/i_remote_fused_graph_executor.h | 4 +- .../remote_fused_graph_execute_utils.cc | 46 +++++++++---------- .../remote_fused_graph_execute_utils.h | 28 +++++++---- .../remote_fused_graph_execute_utils_test.cc | 1 + ...ote_fused_graph_rewriter_transform_test.cc | 1 + tensorflow/core/kernels/summary_interface.h | 5 +- tensorflow/core/kernels/summary_kernels.cc | 1 + 11 files changed, 52 insertions(+), 45 deletions(-) diff --git a/tensorflow/core/framework/remote_fused_graph_execute_info.proto b/tensorflow/core/framework/remote_fused_graph_execute_info.proto index 389a08ac2f..946da40d0e 100644 --- a/tensorflow/core/framework/remote_fused_graph_execute_info.proto +++ b/tensorflow/core/framework/remote_fused_graph_execute_info.proto @@ -14,14 +14,6 @@ import "tensorflow/core/framework/types.proto"; // not valid across executions, but can be serialized back and forth from within // a single run. message RemoteFusedGraphExecuteInfo { - enum NodeType { - UNUSED = 0; - GRAPH_INPUT = 1; - GRAPH_OUTPUT = 2; - FUSED_NODE = 3; - BORDER_INPUT = 4; - BORDER_OUTPUT = 5; - } message TensorShapeTypeProto { DataType dtype = 1; diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 7ef15da143..f7f6a9b505 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -5925,6 +5925,7 @@ tf_cc_test( "//tensorflow/core:core_cpu_internal", "//tensorflow/core:framework", "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", "//tensorflow/core:tensorflow", "//tensorflow/core:test", "//tensorflow/core:test_main", diff --git a/tensorflow/core/kernels/hexagon/hexagon_control_wrapper.cc b/tensorflow/core/kernels/hexagon/hexagon_control_wrapper.cc index 66d24d171d..3810cbe5b5 100644 --- a/tensorflow/core/kernels/hexagon/hexagon_control_wrapper.cc +++ b/tensorflow/core/kernels/hexagon/hexagon_control_wrapper.cc @@ -16,6 +16,7 @@ limitations under the License. #include "tensorflow/core/kernels/hexagon/hexagon_control_wrapper.h" #include "tensorflow/core/framework/graph_transfer_info.pb.h" +#include "tensorflow/core/framework/remote_fused_graph_execute_info.pb.h" #include "tensorflow/core/framework/tensor_shape.pb.h" #include "tensorflow/core/kernels/hexagon/hexagon_ops_definitions.h" #include "tensorflow/core/kernels/hexagon/soc_interface.h" diff --git a/tensorflow/core/kernels/hexagon/hexagon_graph_execution_test.cc b/tensorflow/core/kernels/hexagon/hexagon_graph_execution_test.cc index 5fb6b9247f..d53977703e 100644 --- a/tensorflow/core/kernels/hexagon/hexagon_graph_execution_test.cc +++ b/tensorflow/core/kernels/hexagon/hexagon_graph_execution_test.cc @@ -30,6 +30,7 @@ adb push /tmp/imagenet_comp_graph_label_strings.txt /data/local/tmp #include #include "tensorflow/core/framework/graph_transfer_info.pb.h" +#include "tensorflow/core/framework/remote_fused_graph_execute_info.pb.h" #include "tensorflow/core/framework/tensor_shape.pb.h" #include "tensorflow/core/framework/tensor_testutil.h" #include "tensorflow/core/kernels/hexagon/graph_transfer_utils.h" diff --git a/tensorflow/core/kernels/i_remote_fused_graph_executor.h b/tensorflow/core/kernels/i_remote_fused_graph_executor.h index eb6b64da58..6072412689 100644 --- a/tensorflow/core/kernels/i_remote_fused_graph_executor.h +++ b/tensorflow/core/kernels/i_remote_fused_graph_executor.h @@ -16,13 +16,15 @@ limitations under the License. #ifndef TENSORFLOW_CORE_KERNELS_I_REMOTE_GRAPH_EXECUTOR_H_ #define TENSORFLOW_CORE_KERNELS_I_REMOTE_GRAPH_EXECUTOR_H_ -#include "tensorflow/core/framework/remote_fused_graph_execute_info.pb.h" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/types.h" #include "tensorflow/core/platform/macros.h" namespace tensorflow { +class GraphDef; +class RemoteFusedGraphExecuteInfo; + class IRemoteFusedGraphExecutor { public: using TensorAllocatorFunc = std::function; diff --git a/tensorflow/core/kernels/remote_fused_graph_execute_utils.cc b/tensorflow/core/kernels/remote_fused_graph_execute_utils.cc index e2709c117d..cc4d9a49a0 100644 --- a/tensorflow/core/kernels/remote_fused_graph_execute_utils.cc +++ b/tensorflow/core/kernels/remote_fused_graph_execute_utils.cc @@ -20,7 +20,9 @@ limitations under the License. #include #include "tensorflow/core/common_runtime/shape_refiner.h" +#include "tensorflow/core/framework/graph.pb.h" #include "tensorflow/core/framework/node_def_util.h" +#include "tensorflow/core/framework/remote_fused_graph_execute_info.pb.h" #include "tensorflow/core/framework/tensor.pb.h" #include "tensorflow/core/framework/tensor_shape.pb.h" #include "tensorflow/core/graph/algorithm.h" @@ -1125,46 +1127,43 @@ RemoteFusedGraphExecuteUtils::BuildRemoteFusedGraphExecuteOpNode( for (size_t i = 0; i < inputs.size(); ++i) { if (IsSameNodeName(node_def, inputs.at(i), &tid)) { AppendDeliminator(&attr_str); - attr_str += BuildNodeTypeAttr(RemoteFusedGraphExecuteInfo::GRAPH_INPUT, - tid.second, i, remote_graph_executor_name, + attr_str += BuildNodeTypeAttr(GRAPH_INPUT, tid.second, i, + remote_graph_executor_name, remote_fused_graph_node_name); } } for (size_t i = 0; i < outputs.size(); ++i) { if (IsSameNodeName(node_def, outputs.at(i), &tid)) { AppendDeliminator(&attr_str); - attr_str += BuildNodeTypeAttr(RemoteFusedGraphExecuteInfo::GRAPH_OUTPUT, - tid.second, i); + attr_str += BuildNodeTypeAttr(GRAPH_OUTPUT, tid.second, i); } } for (const string& fused_node_name : fused_node_names) { if (fused_node_name == node_def.name()) { AppendDeliminator(&attr_str); - attr_str += BuildNodeTypeAttr(RemoteFusedGraphExecuteInfo::FUSED_NODE); + attr_str += BuildNodeTypeAttr(FUSED_NODE); } } for (const string& fused_node_name : fused_nodes_filtered_by_op_types) { if (fused_node_name == node_def.name()) { AppendDeliminator(&attr_str); - attr_str += BuildNodeTypeAttr(RemoteFusedGraphExecuteInfo::FUSED_NODE); + attr_str += BuildNodeTypeAttr(FUSED_NODE); } } for (size_t i = 0; i < border_inputs.size(); ++i) { if (IsSameNodeName(node_def, border_inputs.at(i), &tid)) { AppendDeliminator(&attr_str); - attr_str += BuildNodeTypeAttr(RemoteFusedGraphExecuteInfo::BORDER_INPUT, - tid.second, i); + attr_str += BuildNodeTypeAttr(BORDER_INPUT, tid.second, i); } } for (size_t i = 0; i < border_outputs.size(); ++i) { if (IsSameNodeName(node_def, border_outputs.at(i), &tid)) { AppendDeliminator(&attr_str); - attr_str += BuildNodeTypeAttr( - RemoteFusedGraphExecuteInfo::BORDER_OUTPUT, tid.second, i); + attr_str += BuildNodeTypeAttr(BORDER_OUTPUT, tid.second, i); } } if (attr_str.empty()) { - attr_str += BuildNodeTypeAttr(RemoteFusedGraphExecuteInfo::UNUSED); + attr_str += BuildNodeTypeAttr(UNUSED); } AddNodeAttr(ATTR_NODE_TYPE, attr_str, &node_def); } @@ -1200,14 +1199,14 @@ RemoteFusedGraphExecuteUtils::FuseRemoteGraphByPlacedArguments( } int node_type_int; CHECK(strings::safe_strto32(attr.at(0), &node_type_int)) << attr.at(0); - const RemoteFusedGraphExecuteInfo::NodeType node_type = - static_cast(node_type_int); + const RemoteFusedGraphNodeType node_type = + static_cast(node_type_int); const string& name = node_def.name(); int port; int index; switch (node_type) { - case RemoteFusedGraphExecuteInfo::GRAPH_INPUT: + case GRAPH_INPUT: VLOG(2) << "Graph input: " << name; CHECK_EQ(5, attr.size()); CHECK(strings::safe_strto32(attr.at(1), &port)); @@ -1224,33 +1223,33 @@ RemoteFusedGraphExecuteUtils::FuseRemoteGraphByPlacedArguments( return Status::OK(); } break; - case RemoteFusedGraphExecuteInfo::GRAPH_OUTPUT: + case GRAPH_OUTPUT: VLOG(2) << "Graph output: " << name; CHECK_EQ(3, attr.size()); CHECK(strings::safe_strto32(attr.at(1), &port)); CHECK(strings::safe_strto32(attr.at(2), &index)); output_map.emplace(index, strings::StrCat(name, ":", port)); break; - case RemoteFusedGraphExecuteInfo::FUSED_NODE: + case FUSED_NODE: VLOG(2) << "Fused node: " << name; CHECK_EQ(1, attr.size()); fused_node_names.emplace(name); break; - case RemoteFusedGraphExecuteInfo::BORDER_INPUT: + case BORDER_INPUT: VLOG(2) << "Border input: " << name; CHECK_EQ(3, attr.size()); CHECK(strings::safe_strto32(attr.at(1), &port)); CHECK(strings::safe_strto32(attr.at(2), &index)); border_input_map.emplace(index, strings::StrCat(name, ":", port)); break; - case RemoteFusedGraphExecuteInfo::BORDER_OUTPUT: + case BORDER_OUTPUT: VLOG(2) << "Border output: " << name; CHECK_EQ(3, attr.size()); CHECK(strings::safe_strto32(attr.at(1), &port)); CHECK(strings::safe_strto32(attr.at(2), &index)); border_output_map.emplace(index, strings::StrCat(name, ":", port)); break; - case RemoteFusedGraphExecuteInfo::UNUSED: + case UNUSED: // do nothing break; default: @@ -1461,20 +1460,19 @@ RemoteFusedGraphExecuteUtils::BuildNodeMapFromOpsDefinitions( } /* static */ string RemoteFusedGraphExecuteUtils::BuildNodeTypeAttr( - const RemoteFusedGraphExecuteInfo::NodeType node_type, const int port, - const int index, const string& executor_name, const string& node_name) { + const RemoteFusedGraphNodeType node_type, const int port, const int index, + const string& executor_name, const string& node_name) { return strings::StrCat(static_cast(node_type), ",", port, ",", index, ",", executor_name, ",", node_name); } /* static */ string RemoteFusedGraphExecuteUtils::BuildNodeTypeAttr( - const RemoteFusedGraphExecuteInfo::NodeType node_type, const int port, - const int index) { + const RemoteFusedGraphNodeType node_type, const int port, const int index) { return strings::StrCat(static_cast(node_type), ",", port, ",", index); } /* static */ string RemoteFusedGraphExecuteUtils::BuildNodeTypeAttr( - const RemoteFusedGraphExecuteInfo::NodeType node_type) { + const RemoteFusedGraphNodeType node_type) { return strings::StrCat(static_cast(node_type)); } diff --git a/tensorflow/core/kernels/remote_fused_graph_execute_utils.h b/tensorflow/core/kernels/remote_fused_graph_execute_utils.h index f047144278..ea6b6a1015 100644 --- a/tensorflow/core/kernels/remote_fused_graph_execute_utils.h +++ b/tensorflow/core/kernels/remote_fused_graph_execute_utils.h @@ -19,8 +19,6 @@ limitations under the License. #include #include -#include "tensorflow/core/framework/graph.pb.h" -#include "tensorflow/core/framework/remote_fused_graph_execute_info.pb.h" #include "tensorflow/core/graph/graph.h" #include "tensorflow/core/graph/graph_constructor.h" #include "tensorflow/core/kernels/i_remote_fused_graph_executor.h" @@ -30,6 +28,17 @@ limitations under the License. namespace tensorflow { +enum RemoteFusedGraphNodeType { + UNUSED = 0, + GRAPH_INPUT = 1, + GRAPH_OUTPUT = 2, + FUSED_NODE = 3, + BORDER_INPUT = 4, + BORDER_OUTPUT = 5, +}; + +class RemoteFusedGraphExecuteInfo; + // RemoteFusedGraphExecuteUtils provides APIs to register and get builder // functions for IRemoteFusedGraphExecutor. class RemoteFusedGraphExecuteUtils { @@ -297,16 +306,15 @@ class RemoteFusedGraphExecuteUtils { static ExecutorBuildRegistry* GetExecutorBuildRegistry(); - static string BuildNodeTypeAttr( - const RemoteFusedGraphExecuteInfo::NodeType node_type, const int port, - const int index, const string& executor_name, const string& node_name); + static string BuildNodeTypeAttr(const RemoteFusedGraphNodeType node_type, + const int port, const int index, + const string& executor_name, + const string& node_name); - static string BuildNodeTypeAttr( - const RemoteFusedGraphExecuteInfo::NodeType node_type, const int port, - const int index); + static string BuildNodeTypeAttr(const RemoteFusedGraphNodeType node_type, + const int port, const int index); - static string BuildNodeTypeAttr( - const RemoteFusedGraphExecuteInfo::NodeType node_type); + static string BuildNodeTypeAttr(const RemoteFusedGraphNodeType node_type); TF_DISALLOW_COPY_AND_ASSIGN(RemoteFusedGraphExecuteUtils); }; diff --git a/tensorflow/core/kernels/remote_fused_graph_execute_utils_test.cc b/tensorflow/core/kernels/remote_fused_graph_execute_utils_test.cc index aca8ddfae9..44251e6ff8 100644 --- a/tensorflow/core/kernels/remote_fused_graph_execute_utils_test.cc +++ b/tensorflow/core/kernels/remote_fused_graph_execute_utils_test.cc @@ -17,6 +17,7 @@ limitations under the License. #include "tensorflow/cc/framework/scope.h" #include "tensorflow/core/common_runtime/shape_refiner.h" #include "tensorflow/core/framework/node_def.pb.h" +#include "tensorflow/core/framework/remote_fused_graph_execute_info.pb.h" #include "tensorflow/core/kernels/remote_fused_graph_execute_op_test_utils.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/lib/core/status_test_util.h" diff --git a/tensorflow/core/kernels/remote_fused_graph_rewriter_transform_test.cc b/tensorflow/core/kernels/remote_fused_graph_rewriter_transform_test.cc index 9217c25978..1e0731e540 100644 --- a/tensorflow/core/kernels/remote_fused_graph_rewriter_transform_test.cc +++ b/tensorflow/core/kernels/remote_fused_graph_rewriter_transform_test.cc @@ -18,6 +18,7 @@ limitations under the License. #include "tensorflow/cc/ops/nn_ops.h" #include "tensorflow/cc/ops/standard_ops.h" #include "tensorflow/core/common_runtime/function.h" +#include "tensorflow/core/framework/remote_fused_graph_execute_info.pb.h" #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/tensor_testutil.h" #include "tensorflow/core/graph/default_device.h" diff --git a/tensorflow/core/kernels/summary_interface.h b/tensorflow/core/kernels/summary_interface.h index 02391e967a..1854fe5526 100644 --- a/tensorflow/core/kernels/summary_interface.h +++ b/tensorflow/core/kernels/summary_interface.h @@ -17,14 +17,15 @@ limitations under the License. #include -#include "tensorflow/core/framework/graph.pb.h" #include "tensorflow/core/framework/resource_mgr.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/platform/types.h" -#include "tensorflow/core/util/event.pb.h" namespace tensorflow { +class Event; +class GraphDef; + // Main interface for the summary writer resource. class SummaryWriterInterface : public ResourceBase { public: diff --git a/tensorflow/core/kernels/summary_kernels.cc b/tensorflow/core/kernels/summary_kernels.cc index d317a8d33d..b287f0cc2f 100644 --- a/tensorflow/core/kernels/summary_kernels.cc +++ b/tensorflow/core/kernels/summary_kernels.cc @@ -21,6 +21,7 @@ limitations under the License. #include "tensorflow/core/framework/resource_mgr.h" #include "tensorflow/core/lib/db/sqlite.h" #include "tensorflow/core/platform/protobuf.h" +#include "tensorflow/core/util/event.pb.h" namespace tensorflow { -- GitLab From 21bd19a8b8b0be8ac4d39b6bc32366ba908f5105 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Sun, 22 Apr 2018 17:49:13 +0000 Subject: [PATCH 127/434] Change from squeeze_dims to axis when calling tf.squeeze The `squeeze_dims` in `tf.squeeze` has been deprecated in favor of `axis` while many places still use `squeeze_dims`. That generates lots of warnings. This fix switches from `squeeze_dims` to `axis` to remove those warnings. Signed-off-by: Yong Tang --- tensorflow/python/ops/array_grad.py | 2 +- tensorflow/python/ops/array_ops.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/ops/array_grad.py b/tensorflow/python/ops/array_grad.py index 57d2657838..3678bd4c1f 100644 --- a/tensorflow/python/ops/array_grad.py +++ b/tensorflow/python/ops/array_grad.py @@ -196,7 +196,7 @@ def _ConcatGradHelper(op, grad, start_value_index, end_value_index, dim_index): array_ops.where( math_ops.logical_and(grad.indices >= start, grad.indices < end)), - squeeze_dims=[1]) + axis=[1]) new_indices = array_ops.gather(grad.indices, indices_to_select) - start new_values = array_ops.gather(grad.values, indices_to_select) out_grads.append(ops.IndexedSlices(new_values, new_indices, size)) diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py index 23202ae28e..bbffff0483 100644 --- a/tensorflow/python/ops/array_ops.py +++ b/tensorflow/python/ops/array_ops.py @@ -1230,7 +1230,7 @@ def boolean_mask(tensor, mask, name="boolean_mask", axis=None): def _apply_mask_1d(reshaped_tensor, mask, axis=None): """Mask tensor along dimension 0 with a 1-D mask.""" - indices = squeeze(where(mask), squeeze_dims=[1]) + indices = squeeze(where(mask), axis=[1]) return gather(reshaped_tensor, indices, axis=axis) with ops.name_scope(name, values=[tensor, mask]): -- GitLab From 100b6000d4d04a344a1516578f724e46cdede5e1 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Sun, 22 Apr 2018 17:52:31 +0000 Subject: [PATCH 128/434] Fix warning in image related ops. Signed-off-by: Yong Tang --- tensorflow/python/ops/image_ops_impl.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py index 601010bce9..bd5b2ae83b 100644 --- a/tensorflow/python/ops/image_ops_impl.py +++ b/tensorflow/python/ops/image_ops_impl.py @@ -652,7 +652,7 @@ def pad_to_bounding_box(image, offset_height, offset_width, target_height, padded.set_shape(padded_shape) if not is_batch: - padded = array_ops.squeeze(padded, squeeze_dims=[0]) + padded = array_ops.squeeze(padded, axis=[0]) return padded @@ -732,7 +732,7 @@ def crop_to_bounding_box(image, offset_height, offset_width, target_height, cropped.set_shape(cropped_shape) if not is_batch: - cropped = array_ops.squeeze(cropped, squeeze_dims=[0]) + cropped = array_ops.squeeze(cropped, axis=[0]) return cropped @@ -849,7 +849,7 @@ def resize_image_with_crop_or_pad(image, target_height, target_width): resized = control_flow_ops.with_dependencies(assert_ops, resized) if not is_batch: - resized = array_ops.squeeze(resized, squeeze_dims=[0]) + resized = array_ops.squeeze(resized, axis=[0]) return resized @@ -942,7 +942,7 @@ def resize_images(images, for x in [new_width_const, width, new_height_const, height]) and ( width == new_width_const and height == new_height_const): if not is_batch: - images = array_ops.squeeze(images, squeeze_dims=[0]) + images = array_ops.squeeze(images, axis=[0]) return images if method == ResizeMethod.BILINEAR: @@ -965,7 +965,7 @@ def resize_images(images, images.set_shape([None, new_height_const, new_width_const, None]) if not is_batch: - images = array_ops.squeeze(images, squeeze_dims=[0]) + images = array_ops.squeeze(images, axis=[0]) return images -- GitLab From 8cdc752227af998da946decc9365d63bcaa7f184 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Sun, 22 Apr 2018 17:53:10 +0000 Subject: [PATCH 129/434] Fix warning in tf.nn ops where squeeze_dims was used with tf.squeeze Signed-off-by: Yong Tang --- tensorflow/python/ops/nn_impl.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/ops/nn_impl.py b/tensorflow/python/ops/nn_impl.py index d0d5ed07ce..576627e78e 100644 --- a/tensorflow/python/ops/nn_impl.py +++ b/tensorflow/python/ops/nn_impl.py @@ -765,9 +765,9 @@ def weighted_moments(x, axes, frequency_weights, name=None, keep_dims=False): weighted_variance = math_ops.multiply(weighted_distsq, divisor) if not keep_dims: - weighted_mean = array_ops.squeeze(weighted_mean, squeeze_dims=axes) + weighted_mean = array_ops.squeeze(weighted_mean, axis=axes) weighted_variance = array_ops.squeeze( - weighted_variance, squeeze_dims=axes) + weighted_variance, axis=axes) if needs_cast: weighted_mean = math_ops.cast(weighted_mean, dtypes.float16) -- GitLab From 12fd64f72f59ff5ba114903d4b851f855aaf2458 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Sun, 22 Apr 2018 17:53:58 +0000 Subject: [PATCH 130/434] Fix warnings in reduce_join_op_test.py Signed-off-by: Yong Tang --- tensorflow/python/kernel_tests/reduce_join_op_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/kernel_tests/reduce_join_op_test.py b/tensorflow/python/kernel_tests/reduce_join_op_test.py index 7f3049b9f8..fb9e5cc2a3 100644 --- a/tensorflow/python/kernel_tests/reduce_join_op_test.py +++ b/tensorflow/python/kernel_tests/reduce_join_op_test.py @@ -160,7 +160,7 @@ class ReduceJoinTest(UnicodeTestCase): separator=separator) if not reduction_indices: truth = constant_op.constant(truth) - truth_squeezed = array_ops.squeeze(truth, squeeze_dims=reduction_indices) + truth_squeezed = array_ops.squeeze(truth, axis=reduction_indices) output_array = output.eval() output_keep_dims_array = output_keep_dims.eval() truth_array = truth.eval() -- GitLab From 9aa142284166c51dfc202b551b4592f9c9ed54e7 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Sun, 22 Apr 2018 17:54:26 +0000 Subject: [PATCH 131/434] Fix tf.contrib.timeseries warnings related to squeeze_dims Signed-off-by: Yong Tang --- .../timeseries/python/timeseries/state_management_test.py | 2 +- .../python/timeseries/state_space_models/kalman_filter.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/contrib/timeseries/python/timeseries/state_management_test.py b/tensorflow/contrib/timeseries/python/timeseries/state_management_test.py index d5dce30fda..5f7e3da2db 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/state_management_test.py +++ b/tensorflow/contrib/timeseries/python/timeseries/state_management_test.py @@ -78,7 +78,7 @@ class StubTimeSeriesModel(model.TimeSeriesModel): batch_end_values = array_ops.squeeze( array_ops.slice(values, [0, array_ops.shape(times)[1] - 1, 0], [-1, 1, -1]), - squeeze_dims=[1, 2]) + axis=[1, 2]) # A pretty odd but easy to think about loss: L1 loss on the batch end # values. loss = math_ops.reduce_sum( diff --git a/tensorflow/contrib/timeseries/python/timeseries/state_space_models/kalman_filter.py b/tensorflow/contrib/timeseries/python/timeseries/state_space_models/kalman_filter.py index 1fcd3e391b..a614386121 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/state_space_models/kalman_filter.py +++ b/tensorflow/contrib/timeseries/python/timeseries/state_space_models/kalman_filter.py @@ -170,7 +170,7 @@ class KalmanFilter(object): math_ops.matmul( transition_matrices, prior_state[..., None]), - squeeze_dims=[-1]) + axis=[-1]) return advanced_state def predict_state_var( @@ -254,7 +254,7 @@ class KalmanFilter(object): kalman_gain_transposed, array_ops.expand_dims(residual, -1), adjoint_a=True), - squeeze_dims=[-1]) + axis=[-1]) gain_obs = math_ops.matmul( kalman_gain_transposed, observation_model, adjoint_a=True) identity_extradim = linalg_ops.eye( @@ -332,7 +332,7 @@ class KalmanFilter(object): array_ops.expand_dims(state_mean, 1), observation_model, adjoint_b=True), - squeeze_dims=[1]) + axis=[1]) observed_var = math_ops.matmul( math_ops.matmul(observation_model, state_var), observation_model, -- GitLab From 8257b9096062a87555d72f7c15e16b1d8e748d70 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Sun, 22 Apr 2018 17:55:06 +0000 Subject: [PATCH 132/434] Fix warnings in tf.contrib.tensor_forest Signed-off-by: Yong Tang --- tensorflow/contrib/tensor_forest/client/eval_metrics.py | 4 ++-- .../tensor_forest/hybrid/python/layers/fully_connected.py | 2 +- tensorflow/contrib/tensor_forest/python/tensor_forest.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/contrib/tensor_forest/client/eval_metrics.py b/tensorflow/contrib/tensor_forest/client/eval_metrics.py index 90033015eb..e893e1d1c8 100644 --- a/tensorflow/contrib/tensor_forest/client/eval_metrics.py +++ b/tensorflow/contrib/tensor_forest/client/eval_metrics.py @@ -37,7 +37,7 @@ def _top_k_generator(k): def _top_k(probabilities, targets): targets = math_ops.to_int32(targets) if targets.get_shape().ndims > 1: - targets = array_ops.squeeze(targets, squeeze_dims=[1]) + targets = array_ops.squeeze(targets, axis=[1]) return metric_ops.streaming_mean(nn.in_top_k(probabilities, targets, k)) return _top_k @@ -57,7 +57,7 @@ def _r2(probabilities, targets, weights=None): def _squeeze_and_onehot(targets, depth): - targets = array_ops.squeeze(targets, squeeze_dims=[1]) + targets = array_ops.squeeze(targets, axis=[1]) return array_ops.one_hot(math_ops.to_int32(targets), depth) diff --git a/tensorflow/contrib/tensor_forest/hybrid/python/layers/fully_connected.py b/tensorflow/contrib/tensor_forest/hybrid/python/layers/fully_connected.py index ff3ab21eaa..745a5b1caf 100644 --- a/tensorflow/contrib/tensor_forest/hybrid/python/layers/fully_connected.py +++ b/tensorflow/contrib/tensor_forest/hybrid/python/layers/fully_connected.py @@ -55,7 +55,7 @@ class ManyToOneLayer(hybrid_layer.HybridLayer): # There is always one activation per instance by definition, so squeeze # away the extra dimension. - return array_ops.squeeze(nn_activations, squeeze_dims=[1]) + return array_ops.squeeze(nn_activations, axis=[1]) class FlattenedFullyConnectedLayer(hybrid_layer.HybridLayer): diff --git a/tensorflow/contrib/tensor_forest/python/tensor_forest.py b/tensorflow/contrib/tensor_forest/python/tensor_forest.py index b9bcbb170b..7a35a70bbe 100644 --- a/tensorflow/contrib/tensor_forest/python/tensor_forest.py +++ b/tensorflow/contrib/tensor_forest/python/tensor_forest.py @@ -445,7 +445,7 @@ class RandomForestGraphs(object): mask = math_ops.less( r, array_ops.ones_like(r) * self.params.bagging_fraction) gather_indices = array_ops.squeeze( - array_ops.where(mask), squeeze_dims=[1]) + array_ops.where(mask), axis=[1]) # TODO(thomaswc): Calculate out-of-bag data and labels, and store # them for use in calculating statistics later. tree_data = array_ops.gather(processed_dense_features, gather_indices) -- GitLab From 685fec394235b409b58d7ef1c4a26655f9fedcfd Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Sun, 22 Apr 2018 17:55:35 +0000 Subject: [PATCH 133/434] Fix squeeze_dims warnings in tf.contrib.learn Signed-off-by: Yong Tang --- tensorflow/contrib/learn/python/learn/estimators/head.py | 4 ++-- tensorflow/contrib/learn/python/learn/ops/losses_ops.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/learn/python/learn/estimators/head.py b/tensorflow/contrib/learn/python/learn/estimators/head.py index 2b4b6eff39..e28e6854a5 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/head.py +++ b/tensorflow/contrib/learn/python/learn/estimators/head.py @@ -777,7 +777,7 @@ class _RegressionHead(_SingleHead): key = prediction_key.PredictionKey.SCORES with ops.name_scope(None, "predictions", (logits,)): if self.logits_dimension == 1: - logits = array_ops.squeeze(logits, squeeze_dims=(1,), name=key) + logits = array_ops.squeeze(logits, axis=(1,), name=key) return {key: self._link_fn(logits)} def _metrics(self, eval_loss, predictions, labels, weights): @@ -974,7 +974,7 @@ def _softmax_cross_entropy_loss(labels, logits, weights=None): is_squeezed_labels = False # TODO(ptucker): This will break for dynamic shapes. if len(labels.get_shape()) == 2: - labels = array_ops.squeeze(labels, squeeze_dims=(1,)) + labels = array_ops.squeeze(labels, axis=(1,)) is_squeezed_labels = True loss = nn.sparse_softmax_cross_entropy_with_logits( diff --git a/tensorflow/contrib/learn/python/learn/ops/losses_ops.py b/tensorflow/contrib/learn/python/learn/ops/losses_ops.py index 92976d1539..9f2cadb017 100644 --- a/tensorflow/contrib/learn/python/learn/ops/losses_ops.py +++ b/tensorflow/contrib/learn/python/learn/ops/losses_ops.py @@ -40,7 +40,7 @@ def mean_squared_error_regressor(tensor_in, labels, weights, biases, name=None): [tensor_in, labels]): predictions = nn.xw_plus_b(tensor_in, weights, biases) if len(labels.get_shape()) == 1 and len(predictions.get_shape()) == 2: - predictions = array_ops_.squeeze(predictions, squeeze_dims=[1]) + predictions = array_ops_.squeeze(predictions, axis=[1]) return predictions, losses.mean_squared_error(labels, predictions) -- GitLab From 5c19fc7810f13712127b8527b040f8f656474fe5 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Sun, 22 Apr 2018 17:56:09 +0000 Subject: [PATCH 134/434] Fix tf.contrib.layers warnings where squeeze_dims were used with tf.squeeze Signed-off-by: Yong Tang --- tensorflow/contrib/layers/python/layers/target_column.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/layers/python/layers/target_column.py b/tensorflow/contrib/layers/python/layers/target_column.py index 3e639a180e..69bb6be814 100644 --- a/tensorflow/contrib/layers/python/layers/target_column.py +++ b/tensorflow/contrib/layers/python/layers/target_column.py @@ -270,7 +270,7 @@ class _RegressionTargetColumn(_TargetColumn): def logits_to_predictions(self, logits, proba=False): if self.num_label_columns == 1: - return array_ops.squeeze(logits, squeeze_dims=[1]) + return array_ops.squeeze(logits, axis=[1]) return logits def get_eval_ops(self, features, logits, labels, metrics=None): @@ -418,7 +418,7 @@ def _softmax_cross_entropy_loss(logits, target): "Instead got %s." % target.dtype) # sparse_softmax_cross_entropy_with_logits requires [batch_size] target. if len(target.get_shape()) == 2: - target = array_ops.squeeze(target, squeeze_dims=[1]) + target = array_ops.squeeze(target, axis=[1]) loss_vec = nn.sparse_softmax_cross_entropy_with_logits( labels=target, logits=logits) return loss_vec -- GitLab From 50a8df144d24ce60866bff96645f04e84a31f8b4 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Sun, 22 Apr 2018 17:57:06 +0000 Subject: [PATCH 135/434] Fix warnings in tf.contrib.factorization Signed-off-by: Yong Tang --- tensorflow/contrib/factorization/python/ops/gmm_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/factorization/python/ops/gmm_ops.py b/tensorflow/contrib/factorization/python/ops/gmm_ops.py index ccdd679d6a..e076631bc1 100644 --- a/tensorflow/contrib/factorization/python/ops/gmm_ops.py +++ b/tensorflow/contrib/factorization/python/ops/gmm_ops.py @@ -397,7 +397,7 @@ class GmmAlgorithm(object): # Compute the effective number of data points assigned to component k. with ops.control_dependencies(self._w): points_in_k = array_ops.squeeze( - math_ops.add_n(self._points_in_k), squeeze_dims=[0]) + math_ops.add_n(self._points_in_k), axis=[0]) # Update alpha. if 'w' in self._params: final_points_in_k = points_in_k / num_batches -- GitLab From 82eacbd4ac29db754b86a0be0cdfcc65b467c6af Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Sun, 22 Apr 2018 17:57:31 +0000 Subject: [PATCH 136/434] Fix warnings in tf.contrib.distributions with squeeze_dims Signed-off-by: Yong Tang --- .../python/ops/bijectors/cholesky_outer_product.py | 2 +- tensorflow/contrib/distributions/python/ops/shape.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/cholesky_outer_product.py b/tensorflow/contrib/distributions/python/ops/bijectors/cholesky_outer_product.py index caae2adcfa..ecdb8967f4 100644 --- a/tensorflow/contrib/distributions/python/ops/bijectors/cholesky_outer_product.py +++ b/tensorflow/contrib/distributions/python/ops/bijectors/cholesky_outer_product.py @@ -170,7 +170,7 @@ class CholeskyOuterProduct(bijector.Bijector): sum_weighted_log_diag = array_ops.squeeze( math_ops.matmul(math_ops.log(diag), exponents[..., array_ops.newaxis]), - squeeze_dims=-1) + axis=-1) fldj = p_float * np.log(2.) + sum_weighted_log_diag return fldj diff --git a/tensorflow/contrib/distributions/python/ops/shape.py b/tensorflow/contrib/distributions/python/ops/shape.py index bac0b79d59..6a7f28713a 100644 --- a/tensorflow/contrib/distributions/python/ops/shape.py +++ b/tensorflow/contrib/distributions/python/ops/shape.py @@ -439,7 +439,7 @@ class _DistributionShape(object): if self._batch_ndims_is_0 and expand_batch_dim: squeeze_dims += [1] if squeeze_dims: - x = array_ops.squeeze(x, squeeze_dims=squeeze_dims) + x = array_ops.squeeze(x, axis=squeeze_dims) # x.shape: [prod(S)]+B+E _, batch_shape, event_shape = self.get_shape(x) else: -- GitLab From ea0c8a7ed84eb5cdf8ca6a856f9bd05a95597739 Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Sun, 22 Apr 2018 12:18:05 -0700 Subject: [PATCH 137/434] [StreamExecutor] [XLA] Delete copy/pasted implementations of MakeUnique. StreamExecutor and XLA had a copy/pasted implementation of MakeUnique, in namespaces stream_executor::port and xla. This change removes those implementations and instead pulls tensorflow::MakeUnique into namespace stream_executor and namespace xla. We pull it into stream_executor rather than stream_executor::port for consistency with TF and XLA, which both pull MakeUnique into their own namespace. This change also moves MakeUnique and WrapUnique out of namespace tensorflow::scam_ops::internal -- scam can simply use tensorflow::{Make,Wrap}Unique. I suspect the reason it was this way originally was that TF didn't have Make/WrapUnique. PiperOrigin-RevId: 193849330 --- tensorflow/compiler/xla/ptr_util.h | 22 +--------- .../xla/service/interpreter/platform.cc | 4 +- tensorflow/stream_executor/BUILD | 2 + .../stream_executor/cuda/cuda_platform.cc | 4 +- .../stream_executor/host/host_platform.cc | 4 +- tensorflow/stream_executor/lib/ptr_util.h | 42 ++----------------- 6 files changed, 13 insertions(+), 65 deletions(-) diff --git a/tensorflow/compiler/xla/ptr_util.h b/tensorflow/compiler/xla/ptr_util.h index c58c19db2c..bfcdfc62f9 100644 --- a/tensorflow/compiler/xla/ptr_util.h +++ b/tensorflow/compiler/xla/ptr_util.h @@ -28,26 +28,8 @@ limitations under the License. #include "tensorflow/core/util/ptr_util.h" namespace xla { - -template -std::unique_ptr WrapUnique(T* ptr) { - return tensorflow::WrapUnique(ptr); -} - -template -typename tensorflow::helper::MakeUniqueResult::scalar MakeUnique( - Args&&... args) { - return tensorflow::MakeUnique(std::forward(args)...); -} - -// Overload for array of unknown bound. -// The allocation of arrays needs to use the array form of new, -// and cannot take element constructor arguments. -template -typename tensorflow::helper::MakeUniqueResult::array MakeUnique(size_t n) { - return tensorflow::MakeUnique(n); -} - +using tensorflow::MakeUnique; +using tensorflow::WrapUnique; } // namespace xla #endif // TENSORFLOW_COMPILER_XLA_PTR_UTIL_H_ diff --git a/tensorflow/compiler/xla/service/interpreter/platform.cc b/tensorflow/compiler/xla/service/interpreter/platform.cc index ce2f4d378c..92e069a8c6 100644 --- a/tensorflow/compiler/xla/service/interpreter/platform.cc +++ b/tensorflow/compiler/xla/service/interpreter/platform.cc @@ -71,8 +71,8 @@ port::StatusOr XlaInterpreterPlatform::GetExecutor( port::StatusOr> XlaInterpreterPlatform::GetUncachedExecutor( const StreamExecutorConfig& config) { - auto executor = port::MakeUnique( - this, port::MakeUnique(config.plugin_config)); + auto executor = MakeUnique( + this, MakeUnique(config.plugin_config)); auto init_status = executor->Init(config.ordinal, config.device_options); if (!init_status.ok()) { return port::Status{ diff --git a/tensorflow/stream_executor/BUILD b/tensorflow/stream_executor/BUILD index 80fc9ff292..c68cda0100 100644 --- a/tensorflow/stream_executor/BUILD +++ b/tensorflow/stream_executor/BUILD @@ -35,6 +35,7 @@ cc_library( deps = [ "//tensorflow/compiler/xla:statusor", "//tensorflow/core:lib", + "//tensorflow/core:ptr_util", "@local_config_cuda//cuda:cuda_headers", ], alwayslink = 1, @@ -46,6 +47,7 @@ cc_library( visibility = ["//visibility:public"], deps = [ "//tensorflow/core:lib", + "//tensorflow/core:ptr_util", "//tensorflow/compiler/xla:statusor", "@local_config_cuda//cuda:cuda_headers", ] + if_static([":stream_executor_impl"]), diff --git a/tensorflow/stream_executor/cuda/cuda_platform.cc b/tensorflow/stream_executor/cuda/cuda_platform.cc index 7a6ef5a248..649224a20e 100644 --- a/tensorflow/stream_executor/cuda/cuda_platform.cc +++ b/tensorflow/stream_executor/cuda/cuda_platform.cc @@ -168,8 +168,8 @@ port::StatusOr CudaPlatform::GetExecutor( port::StatusOr> CudaPlatform::GetUncachedExecutor(const StreamExecutorConfig& config) { - auto executor = port::MakeUnique( - this, port::MakeUnique(config.plugin_config)); + auto executor = MakeUnique( + this, MakeUnique(config.plugin_config)); auto init_status = executor->Init(config.ordinal, config.device_options); if (!init_status.ok()) { return port::Status{ diff --git a/tensorflow/stream_executor/host/host_platform.cc b/tensorflow/stream_executor/host/host_platform.cc index 00a17a05ed..a652b08b4f 100644 --- a/tensorflow/stream_executor/host/host_platform.cc +++ b/tensorflow/stream_executor/host/host_platform.cc @@ -66,8 +66,8 @@ port::StatusOr HostPlatform::GetExecutor( port::StatusOr> HostPlatform::GetUncachedExecutor(const StreamExecutorConfig& config) { - auto executor = port::MakeUnique( - this, port::MakeUnique(config.plugin_config)); + auto executor = MakeUnique( + this, MakeUnique(config.plugin_config)); auto init_status = executor->Init(config.ordinal, config.device_options); if (!init_status.ok()) { return port::Status{ diff --git a/tensorflow/stream_executor/lib/ptr_util.h b/tensorflow/stream_executor/lib/ptr_util.h index 3f89794688..8f9f420fec 100644 --- a/tensorflow/stream_executor/lib/ptr_util.h +++ b/tensorflow/stream_executor/lib/ptr_util.h @@ -17,47 +17,11 @@ limitations under the License. #define TENSORFLOW_STREAM_EXECUTOR_LIB_PTR_UTIL_H_ #include +#include "tensorflow/core/util/ptr_util.h" namespace stream_executor { -namespace port { - -// Trait to select overloads and return types for MakeUnique. -template -struct MakeUniqueResult { - using scalar = std::unique_ptr; -}; -template -struct MakeUniqueResult { - using array = std::unique_ptr; -}; -template -struct MakeUniqueResult { - using invalid = void; -}; - -// MakeUnique(...) is an early implementation of C++14 std::make_unique. -// It is designed to be 100% compatible with std::make_unique so that the -// eventual switchover will be a simple renaming operation. -template -typename MakeUniqueResult::scalar MakeUnique(Args&&... args) { // NOLINT - return std::unique_ptr( - new T(std::forward(args)...)); // NOLINT(build/c++11) -} - -// Overload for array of unknown bound. -// The allocation of arrays needs to use the array form of new, -// and cannot take element constructor arguments. -template -typename MakeUniqueResult::array MakeUnique(size_t n) { - return std::unique_ptr(new typename std::remove_extent::type[n]()); -} - -// Reject arrays of known bound. -template -typename MakeUniqueResult::invalid MakeUnique(Args&&... /* args */) = - delete; // NOLINT - -} // namespace port +using tensorflow::MakeUnique; +using tensorflow::WrapUnique; } // namespace stream_executor namespace perftools { -- GitLab From 56fd856425f1322d22796decb1f0580c8fab5d5a Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Sun, 22 Apr 2018 14:48:05 -0700 Subject: [PATCH 138/434] [XLA] Make Executable return a ScopedShapedBuffer. Previously, we returned a plain ShapedBuffer. But this doesn't capture our semantics: It's up to the callee to free this ShapedBuffer. PiperOrigin-RevId: 193854051 --- .../compiler/xla/client/local_client.cc | 12 ++--- .../xla/service/allocation_tracker.cc | 45 ++++++++++++------- .../compiler/xla/service/allocation_tracker.h | 32 ++++++++----- .../xla/service/cpu/cpu_executable.cc | 14 +++--- .../compiler/xla/service/cpu/cpu_executable.h | 8 ++-- .../service/cpu/parallel_cpu_executable.cc | 10 ++--- .../xla/service/cpu/parallel_cpu_executable.h | 4 +- tensorflow/compiler/xla/service/executable.cc | 8 ++-- tensorflow/compiler/xla/service/executable.h | 8 ++-- .../xla/service/gpu/gpu_executable.cc | 8 ++-- .../compiler/xla/service/gpu/gpu_executable.h | 4 +- tensorflow/compiler/xla/service/hlo_runner.cc | 14 ++---- .../xla/service/interpreter/executable.cc | 8 ++-- .../xla/service/interpreter/executable.h | 4 +- tensorflow/compiler/xla/service/service.cc | 14 +++--- .../compiler/xla/service/shaped_buffer.cc | 4 +- .../compiler/xla/service/shaped_buffer.h | 6 +++ .../compiler/xla/service/transfer_manager.cc | 15 ++----- .../compiler/xla/service/transfer_manager.h | 5 +-- tensorflow/compiler/xla/tests/fusion_test.cc | 6 +-- 20 files changed, 119 insertions(+), 110 deletions(-) diff --git a/tensorflow/compiler/xla/client/local_client.cc b/tensorflow/compiler/xla/client/local_client.cc index d0e945b70f..1c12705903 100644 --- a/tensorflow/compiler/xla/client/local_client.cc +++ b/tensorflow/compiler/xla/client/local_client.cc @@ -166,12 +166,8 @@ StatusOr LocalExecutable::Run( if (executable_->dumping()) { return ExecuteAndDump(&service_options, arguments); } - TF_ASSIGN_OR_RETURN( - ShapedBuffer result, - executable_->ExecuteOnStreamWrapper( - &service_options, run_options.execution_profile(), arguments)); - - return ScopedShapedBuffer(std::move(result), run_options.allocator()); + return executable_->ExecuteOnStreamWrapper( + &service_options, run_options.execution_profile(), arguments); } StatusOr LocalExecutable::ExecuteAndDump( @@ -181,12 +177,12 @@ StatusOr LocalExecutable::ExecuteAndDump( backend_->platform()->Name()); TF_RETURN_IF_ERROR(RecordArguments(arguments, executable_->session_module())); TF_ASSIGN_OR_RETURN( - ShapedBuffer result, + ScopedShapedBuffer result, executable_->ExecuteOnStream(run_options, arguments, /*hlo_execution_profile=*/nullptr)); TF_RETURN_IF_ERROR(RecordResult(&result, executable_->session_module())); TF_RETURN_IF_ERROR(executable_->DumpSessionModule()); - return ScopedShapedBuffer(std::move(result), run_options->allocator()); + return std::move(result); } tensorflow::Status LocalExecutable::RecordArguments( diff --git a/tensorflow/compiler/xla/service/allocation_tracker.cc b/tensorflow/compiler/xla/service/allocation_tracker.cc index 6bf65825cd..cf1231bcce 100644 --- a/tensorflow/compiler/xla/service/allocation_tracker.cc +++ b/tensorflow/compiler/xla/service/allocation_tracker.cc @@ -31,23 +31,35 @@ limitations under the License. namespace xla { StatusOr AllocationTracker::Register( - ShapedBuffer shaped_buffer, const string& tag) { + ScopedShapedBuffer shaped_buffer, const string& tag) { tensorflow::mutex_lock lock(mutex_); VLOG(2) << "Register"; - std::vector replicated_buffers; + std::vector replicated_buffers; replicated_buffers.emplace_back(std::move(shaped_buffer)); return RegisterInternal(std::move(replicated_buffers), tag); } StatusOr AllocationTracker::RegisterReplicatedBuffers( - std::vector replicated_buffers, const string& tag) { + std::vector replicated_buffers, const string& tag) { tensorflow::mutex_lock lock(mutex_); VLOG(2) << "RegisterReplicatedBuffers"; return RegisterInternal(std::move(replicated_buffers), tag); } +// ReleaseIfScopedShapedBuffer lets RegisterInternal(b) call +// b.release() if b is a ScopedShapedBuffer, or otherwise pass b through +// unmodified. +static ShapedBuffer ReleaseIfScopedShapedBuffer(ShapedBuffer b) { return b; } +static ShapedBuffer ReleaseIfScopedShapedBuffer(ScopedShapedBuffer b) { + return b.release(); +} + +template StatusOr AllocationTracker::RegisterInternal( - std::vector replicated_buffers, const string& tag) { + std::vector replicated_buffers, const string& tag) { + static_assert(std::is_same::value || + std::is_same::value, + "ShapedBufferTy must be ShapedBuffer or ScopedShapedBuffer."); VLOG(2) << "RegisterInternal(" << "tag: \"" << tag << "\" with " << replicated_buffers.size() << " shaped_buffers."; @@ -65,17 +77,22 @@ StatusOr AllocationTracker::RegisterInternal( int64 handle = next_handle_++; for (auto& shaped_buffer : replicated_buffers) { std::vector shape_indices; - ShapeUtil::ForEachSubshape(shaped_buffer.on_device_shape(), - [this, &shape_indices](const Shape& /*subshape*/, - const ShapeIndex& index) { - shape_indices.push_back(index); - }); + ShapeUtil::ForEachSubshape( + shaped_buffer.on_device_shape(), + [&](const Shape& /*subshape*/, const ShapeIndex& index) { + shape_indices.push_back(index); + }); + // Add shaped_buffer's buffers to opaque_to_allocation_map_, which owns + // them. for (const ShapeIndex& index : shape_indices) { AddAllocationOrIncrementRefCount(shaped_buffer.buffer(index), shaped_buffer.device_ordinal()); } - handle_to_shaped_buffers_[handle].emplace_back( - MakeUnique(std::move(shaped_buffer))); + // If ShapedBufferTy is ScopedShapedBuffer, release the ScopedShapedBuffer + // into a regular ShapedBuffer, which is stored in + // handle_to_shaped_buffers_. + handle_to_shaped_buffers_[handle].emplace_back(MakeUnique( + ReleaseIfScopedShapedBuffer(std::move(shaped_buffer)))); } GlobalDataHandle result; @@ -102,10 +119,6 @@ tensorflow::Status AllocationTracker::Unregister(const GlobalDataHandle& data) { shaped_buffer->device_ordinal())); } } - return Reset(data); -} - -Status AllocationTracker::Reset(const GlobalDataHandle& data) { // Keep a nullptr as a tombstone for unregistered handles. This enables // better error messages. That is, "handle has been deallocated" versus // "handle does not exist". @@ -152,7 +165,7 @@ StatusOr> AllocationTracker::DeconstructTuple( element_buffer.set_buffer(shaped_buffer->buffer(/*index=*/{i}), /*index=*/{}); std::vector replicated_buffers; - replicated_buffers.emplace_back(std::move(element_buffer)); + replicated_buffers.push_back(std::move(element_buffer)); TF_ASSIGN_OR_RETURN( GlobalDataHandle element_handle, RegisterInternal(std::move(replicated_buffers), "deconstructed tuple")); diff --git a/tensorflow/compiler/xla/service/allocation_tracker.h b/tensorflow/compiler/xla/service/allocation_tracker.h index 2bfcd53712..1174fa641c 100644 --- a/tensorflow/compiler/xla/service/allocation_tracker.h +++ b/tensorflow/compiler/xla/service/allocation_tracker.h @@ -45,13 +45,13 @@ class AllocationTracker { // Registers a shaped buffer of device memory, and returns a corresponding // handle that can be used for talking to XLA clients. The given shaped buffer // will be treated as the buffer corresponding to the only replica. - StatusOr Register(ShapedBuffer shaped_buffer, + StatusOr Register(ScopedShapedBuffer shaped_buffer, const string& tag); // Registers a vector of shaped buffers of device memory, one per replica, and // returns a corresponding handle that can be used for talking to XLA clients. StatusOr RegisterReplicatedBuffers( - std::vector replicated_buffers, const string& tag); + std::vector replicated_buffers, const string& tag); // Unregister the allocation for the given data handle. Status Unregister(const GlobalDataHandle& data); @@ -87,21 +87,21 @@ class AllocationTracker { }; // Internal helper which resolves the given GlobalDataHandle to a - // ShapedBuffer. + // list of ScopedShapedBuffers. StatusOr> ResolveInternal( const GlobalDataHandle& data) EXCLUSIVE_LOCKS_REQUIRED(mutex_); // Internal helper which registers a vector of shaped buffers, one per - // replica. + // replica. ShapedBufferTy is either ScopedShapedBuffer or ShapedBuffer. If + // it's ShapedBuffer, all of the given buffers must already be tracked by this + // object -- presumably this is a call from DeconstructTuple. + template StatusOr RegisterInternal( - std::vector replicated_buffers, const string& tag) + std::vector replicated_buffers, const string& tag) EXCLUSIVE_LOCKS_REQUIRED(mutex_); - // Resets the shaped buffers corresponding to the given handle. - Status Reset(const GlobalDataHandle& data) EXCLUSIVE_LOCKS_REQUIRED(mutex_); - // Adds the given device address to the allocation tracker, or if it already - // exists, then increment it's reference count. + // exists, then increment its reference count. void AddAllocationOrIncrementRefCount(se::DeviceMemoryBase device_memory, int device_ordinal) EXCLUSIVE_LOCKS_REQUIRED(mutex_); @@ -133,7 +133,19 @@ class AllocationTracker { // buffers for different replicas. // // The ShapedBuffers in this map's vectors need to be unique_ptrs, because our - // public API returns pointers to them. + // public API returns pointers to them. We expect the concrete class to be + // ShapedBuffer and never ScopedShapedBuffer; deallocation of buffers is + // handled by opaque_to_allocation_map_. + // + // The elements of the vectors need to be unique_ptrs because we return + // pointers to them. (In theory we could use std::list or something instead, + // but we also want to be able to null out these elements.) + // + // The reason that the elements can't be unique_ptrs is + // the existence of DeconstructTuple(). This function allows us to create a + // non-owning "view" into a tuple's sub-buffers. The sub-buffers are then + // free'd when both the view *and* the original tuple are Unregistered. This + // refcounting is managed in opaque_to_allocation_map_. tensorflow::gtl::FlatMap>> handle_to_shaped_buffers_ GUARDED_BY(mutex_); diff --git a/tensorflow/compiler/xla/service/cpu/cpu_executable.cc b/tensorflow/compiler/xla/service/cpu/cpu_executable.cc index 97e550abe4..aabf4d5161 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_executable.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_executable.cc @@ -243,14 +243,14 @@ static Status DeallocateTempBuffers( return Status::OK(); } -StatusOr CpuExecutable::CreateResultShapedBuffer( +StatusOr CpuExecutable::CreateResultShapedBuffer( const ServiceExecutableRunOptions* run_options, tensorflow::gtl::ArraySlice allocated_buffers, std::vector* buffers_in_result) { se::Stream* stream = run_options->stream(); - ShapedBuffer result_buffer( + ScopedShapedBuffer result_buffer( /*on_host_shape=*/result_shape(), /*on_device_shape=*/result_shape(), - stream->parent()->platform(), stream->parent()->device_ordinal()); + run_options->allocator(), stream->parent()->device_ordinal()); // Copy DeviceMemoryBase values which contain the array(s) of the result into // the respective location in ShapedBuffer which is returned to the caller. @@ -281,7 +281,7 @@ StatusOr CpuExecutable::CreateResultShapedBuffer( return std::move(result_buffer); } -StatusOr CpuExecutable::ExecuteOnStream( +StatusOr CpuExecutable::ExecuteOnStream( const ServiceExecutableRunOptions* run_options, tensorflow::gtl::ArraySlice arguments, HloExecutionProfile* hlo_execution_profile) { @@ -300,7 +300,7 @@ StatusOr CpuExecutable::ExecuteOnStream( std::vector buffers_in_result(assignment_->Allocations().size(), false); TF_ASSIGN_OR_RETURN( - ShapedBuffer result_buffer, + ScopedShapedBuffer result_buffer, CreateResultShapedBuffer(run_options, buffers, &buffers_in_result)); // Free all buffers not in the result. @@ -310,7 +310,7 @@ StatusOr CpuExecutable::ExecuteOnStream( return std::move(result_buffer); } -StatusOr CpuExecutable::ExecuteAsyncOnStream( +StatusOr CpuExecutable::ExecuteAsyncOnStream( const ServiceExecutableRunOptions* run_options, tensorflow::gtl::ArraySlice arguments) { if (hlo_profiling_enabled()) { @@ -330,7 +330,7 @@ StatusOr CpuExecutable::ExecuteAsyncOnStream( std::vector buffers_in_result(assignment_->Allocations().size(), false); TF_ASSIGN_OR_RETURN( - ShapedBuffer result_buffer, + ScopedShapedBuffer result_buffer, CreateResultShapedBuffer(run_options, buffers, &buffers_in_result)); LogLiveAddresses(buffers, buffers_in_result); diff --git a/tensorflow/compiler/xla/service/cpu/cpu_executable.h b/tensorflow/compiler/xla/service/cpu/cpu_executable.h index 06b6943cb5..68ad38cba8 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_executable.h +++ b/tensorflow/compiler/xla/service/cpu/cpu_executable.h @@ -55,12 +55,12 @@ class CpuExecutable : public Executable { std::unique_ptr hlo_profile_index_map); ~CpuExecutable() override {} - StatusOr ExecuteOnStream( + StatusOr ExecuteOnStream( const ServiceExecutableRunOptions* run_options, tensorflow::gtl::ArraySlice arguments, HloExecutionProfile* hlo_execution_profile) override; - StatusOr ExecuteAsyncOnStream( + StatusOr ExecuteAsyncOnStream( const ServiceExecutableRunOptions* run_options, tensorflow::gtl::ArraySlice arguments) override; @@ -102,13 +102,13 @@ class CpuExecutable : public Executable { tensorflow::gtl::ArraySlice buffers, HloExecutionProfile* hlo_execution_profile); - // Creates a ShapedBuffer for holding the result of the computation. The + // Creates a ScopedShapedBuffer for holding the result of the computation. The // addresses (DeviceMemoryBases) are set according to buffer assignment. // 'buffers_in_result' should point to a vector of the same size as // 'allocated_buffers'. An element in buffers_in_result is set to true if the // corresponding buffer is live out of the computation (and thus contained in // the returned ShapedBuffer). - StatusOr CreateResultShapedBuffer( + StatusOr CreateResultShapedBuffer( const ServiceExecutableRunOptions* run_options, tensorflow::gtl::ArraySlice allocated_buffers, std::vector* buffers_in_result); diff --git a/tensorflow/compiler/xla/service/cpu/parallel_cpu_executable.cc b/tensorflow/compiler/xla/service/cpu/parallel_cpu_executable.cc index a2bd4fa195..035f9ddb2e 100644 --- a/tensorflow/compiler/xla/service/cpu/parallel_cpu_executable.cc +++ b/tensorflow/compiler/xla/service/cpu/parallel_cpu_executable.cc @@ -447,7 +447,7 @@ Status ParallelCpuExecutable::ExecuteComputeFunctions( return Status::OK(); } -StatusOr ParallelCpuExecutable::ExecuteOnStream( +StatusOr ParallelCpuExecutable::ExecuteOnStream( const ServiceExecutableRunOptions* run_options, tensorflow::gtl::ArraySlice arguments, HloExecutionProfile* hlo_execution_profile) { @@ -459,9 +459,9 @@ StatusOr ParallelCpuExecutable::ExecuteOnStream( DeviceMemoryAllocator* memory_allocator = run_options->allocator(); std::vector buffers(assignment_->Allocations().size()); - ShapedBuffer result_buffer( + ScopedShapedBuffer result_buffer( /*on_host_shape=*/result_shape(), /*on_device_shape=*/result_shape(), - stream->parent()->platform(), stream->parent()->device_ordinal()); + run_options->allocator(), stream->parent()->device_ordinal()); TF_RETURN_IF_ERROR(AllocateBuffers( memory_allocator, stream->parent()->device_ordinal(), &buffers)); @@ -470,7 +470,7 @@ StatusOr ParallelCpuExecutable::ExecuteOnStream( hlo_execution_profile)); // Copy DeviceMemoryBase values which into the respective location in - // ShapedBuffer which is returned to the caller. + // the ScopedShapedBuffer which is returned to the caller. std::vector buffers_in_result(assignment_->Allocations().size(), false); TF_RETURN_IF_ERROR(result_buffer.buffers().ForEachMutableElementWithStatus( [&](const ShapeIndex& index, se::DeviceMemoryBase* device_memory) { @@ -511,7 +511,7 @@ StatusOr ParallelCpuExecutable::ExecuteOnStream( return std::move(result_buffer); } -StatusOr ParallelCpuExecutable::ExecuteAsyncOnStream( +StatusOr ParallelCpuExecutable::ExecuteAsyncOnStream( const ServiceExecutableRunOptions* run_options, tensorflow::gtl::ArraySlice arguments) { // TODO(b/30671675): Implement asynchronous execution mode. diff --git a/tensorflow/compiler/xla/service/cpu/parallel_cpu_executable.h b/tensorflow/compiler/xla/service/cpu/parallel_cpu_executable.h index 5ce84fa996..55f8331b59 100644 --- a/tensorflow/compiler/xla/service/cpu/parallel_cpu_executable.h +++ b/tensorflow/compiler/xla/service/cpu/parallel_cpu_executable.h @@ -59,12 +59,12 @@ class ParallelCpuExecutable : public Executable { std::unique_ptr hlo_profile_index_map); ~ParallelCpuExecutable() override {} - StatusOr ExecuteOnStream( + StatusOr ExecuteOnStream( const ServiceExecutableRunOptions* run_options, tensorflow::gtl::ArraySlice arguments, HloExecutionProfile* hlo_execution_profile) override; - StatusOr ExecuteAsyncOnStream( + StatusOr ExecuteAsyncOnStream( const ServiceExecutableRunOptions* run_options, tensorflow::gtl::ArraySlice arguments) override; diff --git a/tensorflow/compiler/xla/service/executable.cc b/tensorflow/compiler/xla/service/executable.cc index be19b3ff04..021f09d310 100644 --- a/tensorflow/compiler/xla/service/executable.cc +++ b/tensorflow/compiler/xla/service/executable.cc @@ -29,12 +29,12 @@ using tensorflow::gtl::ArraySlice; namespace xla { -StatusOr> Executable::ExecuteOnStreams( +StatusOr> Executable::ExecuteOnStreams( ArraySlice run_options, ArraySlice> arguments) { TF_RET_CHECK(run_options.size() == arguments.size()); - std::vector return_values; + std::vector return_values; return_values.reserve(run_options.size()); if (run_options.size() == 1) { @@ -60,7 +60,7 @@ StatusOr> Executable::ExecuteOnStreams( return std::move(return_values); } -StatusOr Executable::ExecuteOnStreamWrapper( +StatusOr Executable::ExecuteOnStreamWrapper( const ServiceExecutableRunOptions* run_options, ExecutionProfile* profile, ArraySlice arguments) { se::Stream* stream = run_options->stream(); @@ -80,7 +80,7 @@ StatusOr Executable::ExecuteOnStreamWrapper( &hlo_profile_index_map()) : nullptr; - StatusOr return_value = + StatusOr return_value = ExecuteOnStream(run_options, arguments, profile_ptr.get()); TF_RETURN_IF_ERROR(return_value.status()); diff --git a/tensorflow/compiler/xla/service/executable.h b/tensorflow/compiler/xla/service/executable.h index 0c95f1a361..f7af1ca574 100644 --- a/tensorflow/compiler/xla/service/executable.h +++ b/tensorflow/compiler/xla/service/executable.h @@ -63,14 +63,14 @@ class Executable { // enabled. // // Returns a shaped buffer containing the result of the computation. - virtual StatusOr ExecuteOnStream( + virtual StatusOr ExecuteOnStream( const ServiceExecutableRunOptions* run_options, tensorflow::gtl::ArraySlice arguments, HloExecutionProfile* hlo_execution_profile) = 0; // Same as ExecuteOnStream(), but this call is non-blocking and returns as // soon as all of the operations are enqueued for launch on the stream. - virtual StatusOr ExecuteAsyncOnStream( + virtual StatusOr ExecuteAsyncOnStream( const ServiceExecutableRunOptions* run_options, tensorflow::gtl::ArraySlice arguments) = 0; @@ -78,7 +78,7 @@ class Executable { // streams. arguments[i] contains the arguments to the execution on // run_options[i]->stream() and the returned value is at index i of the // returned vector. - virtual StatusOr> ExecuteOnStreams( + virtual StatusOr> ExecuteOnStreams( tensorflow::gtl::ArraySlice run_options, tensorflow::gtl::ArraySlice< @@ -98,7 +98,7 @@ class Executable { // Convenience wrapper for calling Executable::ExecuteOnStream. Sets up a // timer for the execution, sets up HLO profiling if enabled, and fills in the // given ExecutionProfile if non-null. - StatusOr ExecuteOnStreamWrapper( + StatusOr ExecuteOnStreamWrapper( const ServiceExecutableRunOptions* run_options, ExecutionProfile* profile, tensorflow::gtl::ArraySlice arguments); diff --git a/tensorflow/compiler/xla/service/gpu/gpu_executable.cc b/tensorflow/compiler/xla/service/gpu/gpu_executable.cc index 62ce15bc59..980cc89fa0 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_executable.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_executable.cc @@ -250,7 +250,7 @@ Status GpuExecutable::ExecuteThunks( return Status::OK(); } -StatusOr GpuExecutable::ExecuteOnStream( +StatusOr GpuExecutable::ExecuteOnStream( const ServiceExecutableRunOptions* run_options, tensorflow::gtl::ArraySlice arguments, HloExecutionProfile* hlo_execution_profile) { @@ -297,8 +297,8 @@ StatusOr GpuExecutable::ExecuteOnStream( HloInstruction* root = hlo_module_->entry_computation()->root_instruction(); auto device_ordinal = executor->device_ordinal(); - auto shaped_buffer = ShapedBuffer(root->shape(), root->shape(), - executor->platform(), device_ordinal); + ScopedShapedBuffer shaped_buffer(root->shape(), root->shape(), + memory_allocator, device_ordinal); // Copy DeviceMemoryBase values which contain the array(s) of the result into // the respective location in ShapedBuffer. @@ -335,7 +335,7 @@ StatusOr GpuExecutable::ExecuteOnStream( return std::move(shaped_buffer); } -StatusOr GpuExecutable::ExecuteAsyncOnStream( +StatusOr GpuExecutable::ExecuteAsyncOnStream( const ServiceExecutableRunOptions* run_options, tensorflow::gtl::ArraySlice arguments) { // TODO(b/30671675): Implement asynchronous execution mode. diff --git a/tensorflow/compiler/xla/service/gpu/gpu_executable.h b/tensorflow/compiler/xla/service/gpu/gpu_executable.h index 361bc30b2f..80ec38c3ac 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_executable.h +++ b/tensorflow/compiler/xla/service/gpu/gpu_executable.h @@ -74,12 +74,12 @@ class GpuExecutable : public Executable { // ExecuteOnStream will fail if the compute capability of the stream doesn't // match the compute capability passed to this object's constructor. - StatusOr ExecuteOnStream( + StatusOr ExecuteOnStream( const ServiceExecutableRunOptions* run_options, tensorflow::gtl::ArraySlice arguments, HloExecutionProfile* hlo_execution_profile) override; - StatusOr ExecuteAsyncOnStream( + StatusOr ExecuteAsyncOnStream( const ServiceExecutableRunOptions* run_options, tensorflow::gtl::ArraySlice arguments) override; diff --git a/tensorflow/compiler/xla/service/hlo_runner.cc b/tensorflow/compiler/xla/service/hlo_runner.cc index df5ffd0b7d..81c43db292 100644 --- a/tensorflow/compiler/xla/service/hlo_runner.cc +++ b/tensorflow/compiler/xla/service/hlo_runner.cc @@ -126,16 +126,12 @@ StatusOr> HloRunner::Execute( } TF_ASSIGN_OR_RETURN( - ShapedBuffer result, + ScopedShapedBuffer result, executable->ExecuteOnStreamWrapper( &service_run_options, /*profile=*/nullptr, argument_buffer_ptrs)); - // Create a ScopedShapedBuffer of the result to manage deallocation. This will - // deallocate all the device memory when it goes out of scope. - ScopedShapedBuffer scoped_result(std::move(result), run_options.allocator()); - auto result_literal = backend().transfer_manager()->TransferLiteralFromDevice( - stream.parent(), scoped_result); + stream.parent(), result); if (result_literal.ok()) { VLOG(4) << "Executed binary and got result: " << result_literal.ValueOrDie()->ToString(); @@ -248,18 +244,16 @@ StatusOr>> HloRunner::ExecuteReplicated( } LOG(INFO) << "Replicated execution started"; - TF_ASSIGN_OR_RETURN(std::vector results, + TF_ASSIGN_OR_RETURN(std::vector results, executable->ExecuteOnStreams(service_run_options, argument_buffer_slices)); LOG(INFO) << "Replicated execution terminated"; std::vector> exec_results; for (int64 i = 0; i < options.num_replicas; ++i) { - ScopedShapedBuffer result(std::move(results[i]), - backend().memory_allocator()); TF_ASSIGN_OR_RETURN(std::unique_ptr literal, backend().transfer_manager()->TransferLiteralFromDevice( - streams[i]->parent(), result)); + streams[i]->parent(), results[i])); exec_results.push_back(std::move(literal)); } return std::move(exec_results); diff --git a/tensorflow/compiler/xla/service/interpreter/executable.cc b/tensorflow/compiler/xla/service/interpreter/executable.cc index 6553000336..61f199bc9e 100644 --- a/tensorflow/compiler/xla/service/interpreter/executable.cc +++ b/tensorflow/compiler/xla/service/interpreter/executable.cc @@ -45,7 +45,7 @@ InterpreterExecutable::InterpreterExecutable( InterpreterExecutable::~InterpreterExecutable() {} -StatusOr InterpreterExecutable::ExecuteOnStream( +StatusOr InterpreterExecutable::ExecuteOnStream( const ServiceExecutableRunOptions* run_options, tensorflow::gtl::ArraySlice arguments, HloExecutionProfile* hlo_execution_profile) { @@ -88,8 +88,8 @@ StatusOr InterpreterExecutable::ExecuteOnStream( evaluator.Evaluate>(*computation, arg_literals)); // Transform the result literal back into a ShapedBuffer. - TF_ASSIGN_OR_RETURN(ShapedBuffer result, - transfer_manager->AllocateShapedBuffer( + TF_ASSIGN_OR_RETURN(ScopedShapedBuffer result, + transfer_manager->AllocateScopedShapedBuffer( result_literal->shape(), run_options->allocator(), executor->device_ordinal())); TF_RETURN_IF_ERROR(transfer_manager->TransferLiteralToDevice( @@ -106,7 +106,7 @@ StatusOr InterpreterExecutable::ExecuteOnStream( return std::move(result); } -StatusOr InterpreterExecutable::ExecuteAsyncOnStream( +StatusOr InterpreterExecutable::ExecuteAsyncOnStream( const ServiceExecutableRunOptions* run_options, tensorflow::gtl::ArraySlice arguments) { return tensorflow::errors::Unimplemented( diff --git a/tensorflow/compiler/xla/service/interpreter/executable.h b/tensorflow/compiler/xla/service/interpreter/executable.h index c825a9a368..b0b797ca7d 100644 --- a/tensorflow/compiler/xla/service/interpreter/executable.h +++ b/tensorflow/compiler/xla/service/interpreter/executable.h @@ -43,12 +43,12 @@ class InterpreterExecutable : public Executable { InterpreterExecutable(std::unique_ptr hlo_module); ~InterpreterExecutable() override; - StatusOr ExecuteOnStream( + StatusOr ExecuteOnStream( const ServiceExecutableRunOptions* run_options, tensorflow::gtl::ArraySlice arguments, HloExecutionProfile* hlo_execution_profile) override; - StatusOr ExecuteAsyncOnStream( + StatusOr ExecuteAsyncOnStream( const ServiceExecutableRunOptions* run_options, tensorflow::gtl::ArraySlice arguments) override; diff --git a/tensorflow/compiler/xla/service/service.cc b/tensorflow/compiler/xla/service/service.cc index a73118c68a..e8403c9e95 100644 --- a/tensorflow/compiler/xla/service/service.cc +++ b/tensorflow/compiler/xla/service/service.cc @@ -553,7 +553,7 @@ Service::ExecuteParallelAndRegisterResult( // Stream executors for the replicas of the current computation. TF_ASSIGN_OR_RETURN(auto replicas, Replicas(*backend, device_handles[i])); CHECK_EQ(replicas.size(), arguments[i].size()); - std::vector result_buffers; + std::vector result_buffers; for (int64 replica = 0; replica < replicas.size(); ++replica) { TF_ASSIGN_OR_RETURN(Pool::SmartPtr stream, backend->BorrowStream(replicas[replica])); @@ -585,7 +585,7 @@ Service::ExecuteParallelAndRegisterResult( backend->StreamBorrower()); // Asynchronously launch the computation. - TF_ASSIGN_OR_RETURN(ShapedBuffer result, + TF_ASSIGN_OR_RETURN(ScopedShapedBuffer result, executables[i]->ExecuteAsyncOnStream( &run_options, arguments[i][replica])); @@ -1237,7 +1237,7 @@ tensorflow::Status Service::ExecuteAsync(const ExecuteAsyncRequest* arg, streams.push_back(std::move(stream)); } - std::vector result_buffers; + std::vector result_buffers; for (size_t i = 0; i < streams.size(); ++i) { const auto& stream = streams[i]; ExecutableRunOptions options; @@ -1250,7 +1250,7 @@ tensorflow::Status Service::ExecuteAsync(const ExecuteAsyncRequest* arg, ServiceExecutableRunOptions service_options( options, execute_backend_->StreamBorrower()); - TF_ASSIGN_OR_RETURN(ShapedBuffer this_result_buffer, + TF_ASSIGN_OR_RETURN(ScopedShapedBuffer this_result_buffer, executable->ExecuteAsyncOnStream( &service_options, replicated_arguments[i])); @@ -1350,11 +1350,11 @@ tensorflow::Status Service::TransferToServer(const TransferToServerRequest* arg, } // Allocate memory in each replica and transfer the data to all replicas. - std::vector replicated_buffers; + std::vector replicated_buffers; for (se::StreamExecutor* executor : replicas) { TF_ASSIGN_OR_RETURN( - ShapedBuffer shaped_buffer, - execute_backend_->transfer_manager()->AllocateShapedBuffer( + ScopedShapedBuffer shaped_buffer, + execute_backend_->transfer_manager()->AllocateScopedShapedBuffer( shape, execute_backend_->memory_allocator(), executor->device_ordinal())); TF_RETURN_IF_ERROR( diff --git a/tensorflow/compiler/xla/service/shaped_buffer.cc b/tensorflow/compiler/xla/service/shaped_buffer.cc index 0b5a383f6f..fb3b5f06da 100644 --- a/tensorflow/compiler/xla/service/shaped_buffer.cc +++ b/tensorflow/compiler/xla/service/shaped_buffer.cc @@ -117,7 +117,7 @@ ScopedShapedBuffer::ScopedShapedBuffer(ShapedBuffer shaped_buffer, : ShapedBuffer(std::move(shaped_buffer)), allocator_(allocator) {} ScopedShapedBuffer::ScopedShapedBuffer(ScopedShapedBuffer&& s) - : ShapedBuffer(std::move(s)), allocator_(s.allocator_) { + : ShapedBuffer(static_cast(s)), allocator_(s.allocator_) { // Null out s.allocator_ so it doesn't try to free anything in its destructor. s.allocator_ = nullptr; } @@ -151,7 +151,7 @@ ScopedShapedBuffer::~ScopedShapedBuffer() { } ShapedBuffer ScopedShapedBuffer::release() { - ShapedBuffer shaped_buffer(std::move(*this)); + ShapedBuffer shaped_buffer(static_cast(*this)); buffers_ = ShapeTree(); return shaped_buffer; } diff --git a/tensorflow/compiler/xla/service/shaped_buffer.h b/tensorflow/compiler/xla/service/shaped_buffer.h index f1b0527474..e10fca9e94 100644 --- a/tensorflow/compiler/xla/service/shaped_buffer.h +++ b/tensorflow/compiler/xla/service/shaped_buffer.h @@ -30,6 +30,8 @@ limitations under the License. namespace xla { +class ScopedShapedBuffer; + // Class which encapsulates a buffer or set of buffers containing data of a // particular XLA shape. class ShapedBuffer { @@ -49,6 +51,10 @@ class ShapedBuffer { ShapedBuffer(const ShapedBuffer&) = delete; ShapedBuffer& operator=(const ShapedBuffer&) = delete; + // Prevent (some forms of) accidental object slicing. + ShapedBuffer(const ScopedShapedBuffer&) = delete; + ShapedBuffer& operator=(const ScopedShapedBuffer&) = delete; + virtual ~ShapedBuffer(); // Returns the shape of the on-host representation of the data held by this diff --git a/tensorflow/compiler/xla/service/transfer_manager.cc b/tensorflow/compiler/xla/service/transfer_manager.cc index 98d0111d04..8b71a41509 100644 --- a/tensorflow/compiler/xla/service/transfer_manager.cc +++ b/tensorflow/compiler/xla/service/transfer_manager.cc @@ -175,7 +175,7 @@ Status TransferManager::TransferBufferToDevice( return Status::OK(); } -StatusOr TransferManager::AllocateShapedBuffer( +StatusOr TransferManager::AllocateScopedShapedBuffer( const Shape& on_host_shape, DeviceMemoryAllocator* allocator, int device_ordinal) { if (!LayoutUtil::HasLayout(on_host_shape)) { @@ -187,8 +187,8 @@ StatusOr TransferManager::AllocateShapedBuffer( const Shape on_device_shape = HostShapeToDeviceShape(on_host_shape); TF_RET_CHECK(LayoutUtil::HasLayout(on_device_shape)); - ShapedBuffer shaped_buffer(on_host_shape, on_device_shape, - allocator->platform(), device_ordinal); + ScopedShapedBuffer shaped_buffer(on_host_shape, on_device_shape, allocator, + device_ordinal); // Allocate an appropriate sized buffer for each element in the shape // including the tuple pointer arrays. @@ -204,13 +204,4 @@ StatusOr TransferManager::AllocateShapedBuffer( return std::move(shaped_buffer); } -StatusOr TransferManager::AllocateScopedShapedBuffer( - const Shape& on_host_shape, DeviceMemoryAllocator* allocator, - int device_ordinal) { - TF_ASSIGN_OR_RETURN( - ShapedBuffer unscoped_buffer, - AllocateShapedBuffer(on_host_shape, allocator, device_ordinal)); - return ScopedShapedBuffer(std::move(unscoped_buffer), allocator); -} - } // namespace xla diff --git a/tensorflow/compiler/xla/service/transfer_manager.h b/tensorflow/compiler/xla/service/transfer_manager.h index a6451c4bb1..d82b4f0f81 100644 --- a/tensorflow/compiler/xla/service/transfer_manager.h +++ b/tensorflow/compiler/xla/service/transfer_manager.h @@ -104,12 +104,9 @@ class TransferManager { // region for a host-to-device transfer. virtual int64 GetByteSizeRequirement(const Shape& shape) const = 0; - // Allocate a ShapedBuffer which can hold data with the given on-host + // Allocates a ScopedShapedBuffer which can hold data with the given on-host // shape. The on-device shape may be different as indicated by // HostShapeToDeviceShape. - StatusOr AllocateShapedBuffer(const Shape& on_host_shape, - DeviceMemoryAllocator* allocator, - int device_ordinal); StatusOr AllocateScopedShapedBuffer( const Shape& on_host_shape, DeviceMemoryAllocator* allocator, int device_ordinal); diff --git a/tensorflow/compiler/xla/tests/fusion_test.cc b/tensorflow/compiler/xla/tests/fusion_test.cc index c7f64d8560..6f89e9164c 100644 --- a/tensorflow/compiler/xla/tests/fusion_test.cc +++ b/tensorflow/compiler/xla/tests/fusion_test.cc @@ -794,19 +794,19 @@ void BM_ParallelFusion(int num_iters) { // Transfer literals to device. auto param0_literal = Literal::CreateR2F32Linspace(1.0, 2.0, param0_dim0, param0_dim1); - ShapedBuffer buffer0 = + ScopedShapedBuffer buffer0 = client->LiteralToShapedBuffer(*param0_literal, device_ordinal) .ConsumeValueOrDie(); auto param1_literal = Literal::CreateR2F32Linspace(1.0, 2.0, param1_dim0, param1_dim1); - ShapedBuffer buffer1 = + ScopedShapedBuffer buffer1 = client->LiteralToShapedBuffer(*param1_literal, device_ordinal) .ConsumeValueOrDie(); auto param2_literal = Literal::CreateR2F32Linspace(1.0, 2.0, param2_dim0, param2_dim1); - ShapedBuffer buffer2 = + ScopedShapedBuffer buffer2 = client->LiteralToShapedBuffer(*param2_literal, device_ordinal) .ConsumeValueOrDie(); -- GitLab From c1544d1c34dac9aa01ed2de84bc850f8d1bfe919 Mon Sep 17 00:00:00 2001 From: Yifei Feng Date: Sun, 22 Apr 2018 19:08:21 -0700 Subject: [PATCH 139/434] Update tuple for cuda version with auto as it was removed in #18434. --- tensorflow/core/kernels/conv_ops_gpu.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tensorflow/core/kernels/conv_ops_gpu.h b/tensorflow/core/kernels/conv_ops_gpu.h index 7f9cfec981..bbd5a53660 100644 --- a/tensorflow/core/kernels/conv_ops_gpu.h +++ b/tensorflow/core/kernels/conv_ops_gpu.h @@ -143,8 +143,7 @@ class ConvParameters { bool ShouldIncludeWinogradNonfusedAlgo( perftools::gputools::StreamExecutor* stream_exec) const { // Skip this check for cuDNN 7 and newer. - perftools::gputools::port::StatusOr> version = - stream_exec->AsDnn()->GetVersion(); + auto version = stream_exec->AsDnn()->GetVersion(); if (version.ok() && std::get<0>(version.ValueOrDie()) >= 7) { return true; } -- GitLab From e5cfbd0eceb4dca98b388b13acff499a5420f863 Mon Sep 17 00:00:00 2001 From: Yifei Feng Date: Sun, 22 Apr 2018 20:00:54 -0700 Subject: [PATCH 140/434] Fix more for cuda version check. --- tensorflow/core/kernels/conv_ops_gpu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/kernels/conv_ops_gpu.h b/tensorflow/core/kernels/conv_ops_gpu.h index bbd5a53660..e8da5298e6 100644 --- a/tensorflow/core/kernels/conv_ops_gpu.h +++ b/tensorflow/core/kernels/conv_ops_gpu.h @@ -144,7 +144,7 @@ class ConvParameters { perftools::gputools::StreamExecutor* stream_exec) const { // Skip this check for cuDNN 7 and newer. auto version = stream_exec->AsDnn()->GetVersion(); - if (version.ok() && std::get<0>(version.ValueOrDie()) >= 7) { + if (version.ok() && version.ValueOrDie().major_version() >= 7) { return true; } return ShouldIncludeWinogradNonfusedAlgoPreCudnn7(); -- GitLab From 734636640534cd9478a7465c3975031a089629ea Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sun, 22 Apr 2018 22:04:22 -0700 Subject: [PATCH 141/434] Rm references to SubmodelPort PiperOrigin-RevId: 193873101 --- tensorflow/contrib/optimizer_v2/optimizer_v2.py | 15 --------------- tensorflow/python/training/optimizer.py | 15 --------------- 2 files changed, 30 deletions(-) diff --git a/tensorflow/contrib/optimizer_v2/optimizer_v2.py b/tensorflow/contrib/optimizer_v2/optimizer_v2.py index 25d19578ea..dcb5bb6416 100644 --- a/tensorflow/contrib/optimizer_v2/optimizer_v2.py +++ b/tensorflow/contrib/optimizer_v2/optimizer_v2.py @@ -125,19 +125,6 @@ class _DenseResourceVariableProcessor(_OptimizableVariable): return update_op -class _StreamingModelPortProcessor(_OptimizableVariable): - """Processor for streaming ModelPorts.""" - - def __init__(self, v): - self._v = v - - def target(self): - return self._v - - def update_op(self, optimizer, g, *args): - return g - - class _TensorProcessor(_OptimizableVariable): """Processor for ordinary Tensors. @@ -167,8 +154,6 @@ def _get_processor(v): return _DenseResourceVariableProcessor(v) if isinstance(v, variables.Variable): return _RefVariableProcessor(v) - if v.op.type == "SubmodelPort": - return _StreamingModelPortProcessor(v) if isinstance(v, ops.Tensor): return _TensorProcessor(v) raise NotImplementedError("Trying to optimize unsupported type ", v) diff --git a/tensorflow/python/training/optimizer.py b/tensorflow/python/training/optimizer.py index f126d3847b..66914bacf3 100644 --- a/tensorflow/python/training/optimizer.py +++ b/tensorflow/python/training/optimizer.py @@ -170,19 +170,6 @@ class _DenseResourceVariableProcessor(_OptimizableVariable): return update_op -class _StreamingModelPortProcessor(_OptimizableVariable): - """Processor for streaming ModelPorts.""" - - def __init__(self, v): - self._v = v - - def target(self): - return self._v - - def update_op(self, optimizer, g): - return g - - class _TensorProcessor(_OptimizableVariable): """Processor for ordinary Tensors. @@ -216,8 +203,6 @@ def _get_processor(v): return _DenseResourceVariableProcessor(v) if isinstance(v, variables.Variable): return _RefVariableProcessor(v) - if v.op.type == "SubmodelPort": - return _StreamingModelPortProcessor(v) if isinstance(v, ops.Tensor): return _TensorProcessor(v) raise NotImplementedError("Trying to optimize unsupported type ", v) -- GitLab From 97bc1d90b385d06400376ceba8a924f4982c0434 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sun, 22 Apr 2018 22:17:13 -0700 Subject: [PATCH 142/434] Init struct bools to false to prevent warnings by dynamic type checking programs when an uninitialized value is read by operator=. PiperOrigin-RevId: 193873776 --- tensorflow/core/framework/collective.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/framework/collective.h b/tensorflow/core/framework/collective.h index 40d82ab0e9..0943b85fba 100644 --- a/tensorflow/core/framework/collective.h +++ b/tensorflow/core/framework/collective.h @@ -80,7 +80,7 @@ struct CollInstanceParams { // Task name prefix of corresponding device name. std::vector task_names; // True if every task has the same number of devices. - bool same_num_devices_per_task; + bool same_num_devices_per_task = false; CollImplDetails impl_details; string ToString() const; CollInstanceParams& operator=(const struct CollInstanceParams& other); @@ -99,9 +99,9 @@ struct CollectiveParams { CollInstanceParams instance; CollTaskParams task; - string name; // node name used only for log or error messages - int default_rank; // index of this op within device_names - bool is_source; // broadcast only + string name; // node name used only for log or error messages + int default_rank; // index of this op within device_names + bool is_source = false; // broadcast only // Rank of this device in each subdivision permutation. std::vector subdiv_rank; std::unique_ptr merge_op; // reduction only -- GitLab From 6d57bca02b3278e812658fe5514a2bcb17670dbe Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 23 Apr 2018 02:53:01 -0700 Subject: [PATCH 143/434] Fix dilated bound calculation in window util for size 0 Previusly the logic calculated incorrect bounds for the case where the base bond is 0 causing issues with 0 sized base dilated convolutions. PiperOrigin-RevId: 193896380 --- tensorflow/compiler/xla/window_util.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/window_util.cc b/tensorflow/compiler/xla/window_util.cc index 93284b80f9..f11123ca24 100644 --- a/tensorflow/compiler/xla/window_util.cc +++ b/tensorflow/compiler/xla/window_util.cc @@ -199,6 +199,9 @@ bool IsInactiveWindowDimension(const Window& window, int64 logical_dim) { int64 DilatedBound(int64 bound, int64 dilation) { CHECK_GE(bound, 0); CHECK_GE(dilation, 1); + if (bound == 0) { + return 0; + } // Suppose the array has three entries 123 and the dilation factor is 4. Then // the dilated array has 9 entries 1xxx2xxx3. Here, each original entry except @@ -212,7 +215,7 @@ int64 StridedBound(int64 bound, int64 window_size, int64 stride) { CHECK_GE(bound, 0); CHECK_GE(stride, 1); - if (window_size > bound) { + if (bound == 0 || window_size > bound) { return 0; } -- GitLab From a821ea02afd05a96dd0e118e6ee745d472c61b3e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 23 Apr 2018 06:55:23 -0700 Subject: [PATCH 144/434] Support non-equal set sizes for FID computation. PiperOrigin-RevId: 193917167 --- .../eval/python/classifier_metrics_impl.py | 30 ++++++++++--------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/tensorflow/contrib/gan/python/eval/python/classifier_metrics_impl.py b/tensorflow/contrib/gan/python/eval/python/classifier_metrics_impl.py index 47e51415fd..d914f54945 100644 --- a/tensorflow/contrib/gan/python/eval/python/classifier_metrics_impl.py +++ b/tensorflow/contrib/gan/python/eval/python/classifier_metrics_impl.py @@ -488,25 +488,25 @@ def frechet_classifier_distance(real_images, The Frechet Inception distance. A floating-point scalar of the same type as the output of `classifier_fn`. """ - real_images_list = array_ops.split( real_images, num_or_size_splits=num_batches) generated_images_list = array_ops.split( generated_images, num_or_size_splits=num_batches) - imgs = array_ops.stack(real_images_list + generated_images_list) + real_imgs = array_ops.stack(real_images_list) + generated_imgs = array_ops.stack(generated_images_list) # Compute the activations using the memory-efficient `map_fn`. - activations = functional_ops.map_fn( - fn=classifier_fn, - elems=imgs, - parallel_iterations=1, - back_prop=False, - swap_memory=True, - name='RunClassifier') + def compute_activations(elems): + return functional_ops.map_fn(fn=classifier_fn, + elems=elems, + parallel_iterations=1, + back_prop=False, + swap_memory=True, + name='RunClassifier') - # Split the activations by the real and generated images. - real_a, gen_a = array_ops.split(activations, [num_batches, num_batches], 0) + real_a = compute_activations(real_imgs) + gen_a = compute_activations(generated_imgs) # Ensure the activations have the right shapes. real_a = array_ops.concat(array_ops.unstack(real_a), 0) @@ -697,18 +697,20 @@ def frechet_classifier_distance_from_activations(real_activations, # Compute mean and covariance matrices of activations. m = math_ops.reduce_mean(real_activations, 0) m_w = math_ops.reduce_mean(generated_activations, 0) - num_examples = math_ops.to_double(array_ops.shape(real_activations)[0]) + num_examples_real = math_ops.to_double(array_ops.shape(real_activations)[0]) + num_examples_generated = math_ops.to_double( + array_ops.shape(generated_activations)[0]) # sigma = (1 / (n - 1)) * (X - mu) (X - mu)^T real_centered = real_activations - m sigma = math_ops.matmul( real_centered, real_centered, transpose_a=True) / ( - num_examples - 1) + num_examples_real - 1) gen_centered = generated_activations - m_w sigma_w = math_ops.matmul( gen_centered, gen_centered, transpose_a=True) / ( - num_examples - 1) + num_examples_generated - 1) # Find the Tr(sqrt(sigma sigma_w)) component of FID sqrt_trace_component = trace_sqrt_product(sigma, sigma_w) -- GitLab From c45ffa87d3c7a74a32fcce5c9cebb2a30a2980ab Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 23 Apr 2018 07:36:37 -0700 Subject: [PATCH 145/434] Automated g4 rollback of changelist 193234819 PiperOrigin-RevId: 193921660 --- .../ci_build/windows/bazel/bazel_test_lib.sh | 7 +++++ .../windows/cpu/pip/build_tf_windows.sh | 26 +++++++++++++++---- 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh b/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh index d654b433e7..582188fc00 100644 --- a/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh +++ b/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh @@ -140,6 +140,13 @@ function run_configure_for_gpu_build { echo "" | ./configure } +function set_gcs_remote_cache_options { + echo "build --experimental_remote_spawn_cache" >> "${TMP_BAZELRC}" + echo "build --experimental_remote_platform_override='properties:{name:\"build\" value:\"windows-x64\"}'" >> "${TMP_BAZELRC}" + echo "build --remote_http_cache=https://storage.googleapis.com/$GCS_BUCKET_NAME" >> "${TMP_BAZELRC}" + echo "build --google_credentials=$GOOGLE_CLOUD_CREDENTIAL" >> "${TMP_BAZELRC}" +} + function create_python_test_dir() { rm -rf "$1" mkdir -p "$1" diff --git a/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh b/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh index 5e9ae497e1..8b7495b3b8 100644 --- a/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh +++ b/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh @@ -42,20 +42,36 @@ source "tensorflow/tools/ci_build/windows/bazel/common_env.sh" \ source "tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh" \ || { echo "Failed to source bazel_test_lib.sh" >&2; exit 1; } +# Recreate an empty bazelrc file under source root +export TMP_BAZELRC=.tmp.bazelrc +rm -f "${TMP_BAZELRC}" +touch "${TMP_BAZELRC}" + +function cleanup { + # Remove all options in .tmp.bazelrc + echo "" > "${TMP_BAZELRC}" +} +trap cleanup EXIT + skip_test=0 for ARG in "$@"; do if [[ "$ARG" == --skip_test ]]; then skip_test=1 + elif [[ "$ARG" == --enable_gcs_remote_cache ]]; then + set_gcs_remote_cache_options fi done -run_configure_for_cpu_build - # --define=override_eigen_strong_inline=true speeds up the compiling of conv_grad_ops_3d.cc and conv_ops_3d.cc # by 20 minutes. See https://github.com/tensorflow/tensorflow/issues/10521 -BUILD_OPTS="--define=override_eigen_strong_inline=true" -bazel build -c opt $BUILD_OPTS tensorflow/tools/pip_package:build_pip_package || exit $? +echo "build --define=override_eigen_strong_inline=true" >> "${TMP_BAZELRC}" + +echo "import %workspace%/${TMP_BAZELRC}" >> .bazelrc + +run_configure_for_cpu_build + +bazel build --announce_rc -c opt tensorflow/tools/pip_package:build_pip_package || exit $? if [[ "$skip_test" == 1 ]]; then exit 0 @@ -73,7 +89,7 @@ reinstall_tensorflow_pip ${PIP_NAME} # Define no_tensorflow_py_deps=true so that every py_test has no deps anymore, # which will result testing system installed tensorflow -bazel test -c opt $BUILD_OPTS -k --test_output=errors \ +bazel test -c opt -k --test_output=errors \ --define=no_tensorflow_py_deps=true --test_lang_filters=py \ --test_tag_filters=-no_pip,-no_windows,-no_oss \ --build_tag_filters=-no_pip,-no_windows,-no_oss --build_tests_only \ -- GitLab From 9a39d4890da10545f326cf4180d758f2d7c2a3bb Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 23 Apr 2018 08:27:07 -0700 Subject: [PATCH 146/434] Adds functionality to subsample the inputs to extract image patches. Add functionality to subsample the extracted image patches based on the number of the outer products per entry of the covariance matrix. PiperOrigin-RevId: 193927804 --- .../kernel_tests/fisher_factors_test.py | 15 +++ tensorflow/contrib/kfac/python/ops/BUILD | 3 + .../contrib/kfac/python/ops/fisher_factors.py | 109 +++++++++++++++++- 3 files changed, 126 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/kfac/python/kernel_tests/fisher_factors_test.py b/tensorflow/contrib/kfac/python/kernel_tests/fisher_factors_test.py index 2a3592c53f..432b67e569 100644 --- a/tensorflow/contrib/kfac/python/kernel_tests/fisher_factors_test.py +++ b/tensorflow/contrib/kfac/python/kernel_tests/fisher_factors_test.py @@ -814,6 +814,21 @@ class ConvInputKroneckerFactorTest(ConvFactorTestCase): new_cov = sess.run(factor.make_covariance_update_op(0.)) self.assertAllClose([[(1. + 4.) / 2.]], new_cov) + def testSubSample(self): + with tf_ops.Graph().as_default(): + patches_1 = array_ops.constant(1, shape=(10, 2)) + patches_2 = array_ops.constant(1, shape=(10, 8)) + patches_3 = array_ops.constant(1, shape=(3, 3)) + patches_1_sub = ff._subsample_for_cov_computation(patches_1) + patches_2_sub = ff._subsample_for_cov_computation(patches_2) + patches_3_sub = ff._subsample_for_cov_computation(patches_3) + patches_1_sub_batch_size = patches_1_sub.shape.as_list()[0] + patches_2_sub_batch_size = patches_2_sub.shape.as_list()[0] + patches_3_sub_batch_size = patches_3_sub.shape.as_list()[0] + self.assertEqual(2, patches_1_sub_batch_size) + self.assertEqual(8, patches_2_sub_batch_size) + self.assertEqual(3, patches_3_sub_batch_size) + class ConvOutputKroneckerFactorTest(ConvFactorTestCase): diff --git a/tensorflow/contrib/kfac/python/ops/BUILD b/tensorflow/contrib/kfac/python/ops/BUILD index b897fd68a0..cb0917bb85 100644 --- a/tensorflow/contrib/kfac/python/ops/BUILD +++ b/tensorflow/contrib/kfac/python/ops/BUILD @@ -37,10 +37,13 @@ py_library( deps = [ ":utils", "//tensorflow/python:array_ops", + "//tensorflow/python:control_flow_ops", + "//tensorflow/python:dtypes", "//tensorflow/python:framework_ops", "//tensorflow/python:init_ops", "//tensorflow/python:linalg_ops", "//tensorflow/python:math_ops", + "//tensorflow/python:random_ops", "//tensorflow/python:special_math_ops", "//tensorflow/python:training", "//tensorflow/python:variable_scope", diff --git a/tensorflow/contrib/kfac/python/ops/fisher_factors.py b/tensorflow/contrib/kfac/python/ops/fisher_factors.py index 0d40d265a1..b2da13db89 100644 --- a/tensorflow/contrib/kfac/python/ops/fisher_factors.py +++ b/tensorflow/contrib/kfac/python/ops/fisher_factors.py @@ -32,6 +32,7 @@ from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import init_ops from tensorflow.python.ops import linalg_ops from tensorflow.python.ops import math_ops +from tensorflow.python.ops import random_ops from tensorflow.python.ops import special_math_ops from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables @@ -55,6 +56,22 @@ EIGENVALUE_DECOMPOSITION_THRESHOLD = 2 # matrix powers. Must be nonnegative. EIGENVALUE_CLIPPING_THRESHOLD = 0.0 +# Used to subsample the flattened extracted image patches. The number of +# outer products per row of the covariance matrix should not exceed this +# value. This parameter is used only if `_SUB_SAMPLE_OUTER_PRODUCTS` is True. +_MAX_NUM_OUTER_PRODUCTS_PER_COV_ROW = 1 + +# Used to subsample the inputs passed to the extract image patches. The batch +# size of number of inputs to extract image patches is multiplied by this +# factor. This parameter is used only if `_SUB_SAMPLE_INPUTS` is True. +_INPUTS_TO_EXTRACT_PATCHES_FACTOR = 0.5 + +# If True, then subsamples the tensor passed to compute the covaraince matrix. +_SUB_SAMPLE_OUTER_PRODUCTS = False + +# If True, then subsamples the tensor passed to compute the covaraince matrix. +_SUB_SAMPLE_INPUTS = False + # TOWER_STRATEGY can be one of "concat" or "separate". If "concat", the data # passed to the factors from the blocks will be concatenated across towers # (lazilly via PartitionedTensor objects). Otherwise a tuple of tensors over @@ -67,12 +84,20 @@ def set_global_constants(init_covariances_at_zero=None, zero_debias=None, eigenvalue_decomposition_threshold=None, eigenvalue_clipping_threshold=None, + max_num_outer_products_per_cov_row=None, + sub_sample_outer_products=None, + inputs_to_extract_ptaches_factor=None, + sub_sample_inputs=None, tower_strategy=None): """Sets various global constants used by the classes in this module.""" global INIT_COVARIANCES_AT_ZERO global ZERO_DEBIAS global EIGENVALUE_DECOMPOSITION_THRESHOLD global EIGENVALUE_CLIPPING_THRESHOLD + global _MAX_NUM_OUTER_PRODUCTS_PER_COV_ROW + global _SUB_SAMPLE_OUTER_PRODUCTS + global _INPUTS_TO_EXTRACT_PATCHES_FACTOR + global _SUB_SAMPLE_INPUTS global TOWER_STRATEGY if init_covariances_at_zero is not None: @@ -83,6 +108,14 @@ def set_global_constants(init_covariances_at_zero=None, EIGENVALUE_DECOMPOSITION_THRESHOLD = eigenvalue_decomposition_threshold if eigenvalue_clipping_threshold is not None: EIGENVALUE_CLIPPING_THRESHOLD = eigenvalue_clipping_threshold + if max_num_outer_products_per_cov_row is not None: + _MAX_NUM_OUTER_PRODUCTS_PER_COV_ROW = max_num_outer_products_per_cov_row + if sub_sample_outer_products is not None: + _SUB_SAMPLE_OUTER_PRODUCTS = sub_sample_outer_products + if inputs_to_extract_ptaches_factor is not None: + _INPUTS_TO_EXTRACT_PATCHES_FACTOR = inputs_to_extract_ptaches_factor + if sub_sample_inputs is not None: + _SUB_SAMPLE_INPUTS = sub_sample_inputs if tower_strategy is not None: TOWER_STRATEGY = tower_strategy @@ -227,6 +260,58 @@ def graph_func_to_string(func): return list_to_string(func.func_id) +def _subsample_for_cov_computation(array, name=None): + """Subsamples the first dimension of the array. + + `array`(A) is a tensor of shape `[batch_size, dim_2]`. Then the covariance + matrix(A^TA) is of shape `dim_2 ** 2`. Subsample only if the number of outer + products per row of the covariance matrix is greater than + `_MAX_NUM_OUTER_PRODUCTS_PER_COV_ROW`. + + Args: + array: Tensor, of shape `[batch_size, dim_2]`. + name: `string`, Default(None) + + Returns: + A tensor of shape `[max_samples, dim_2]`. + + Raises: + ValueError: If array's is not matrix-shaped. + ValueError: If array's batch_size cannot be inferred. + + """ + with tf_ops.name_scope(name, "subsample", [array]): + array = tf_ops.convert_to_tensor(array) + if len(array.shape) != 2: + raise ValueError("Input param array must be a matrix.") + + batch_size = array.shape.as_list()[0] + if batch_size is None: + raise ValueError("Unable to get batch_size from input param array.") + + num_cov_rows = array.shape.as_list()[-1] + max_batch_size = int(_MAX_NUM_OUTER_PRODUCTS_PER_COV_ROW * num_cov_rows) + if batch_size <= max_batch_size: + return array + + return _random_tensor_gather(array, max_batch_size) + + +def _random_tensor_gather(array, max_size): + """Generates a random set of indices and gathers the value at the indcices. + + Args: + array: Tensor, of shape `[batch_size, dim_2]`. + max_size: int, Number of indices to sample. + + Returns: + A tensor of shape `[max_size, ...]`. + """ + batch_size = array.shape.as_list()[0] + indices = random_ops.random_shuffle(math_ops.range(0, batch_size))[:max_size] + return array_ops.gather(array, indices) + + @six.add_metaclass(abc.ABCMeta) class FisherFactor(object): """Base class for objects modeling factors of approximate Fisher blocks. @@ -1153,7 +1238,9 @@ class ConvInputKroneckerFactor(InverseProvidingFactor): dilation_rate=None, data_format=None, extract_patches_fn=None, - has_bias=False): + has_bias=False, + sub_sample_inputs=None, + sub_sample_patches=None): """Initializes ConvInputKroneckerFactor. Args: @@ -1173,6 +1260,10 @@ class ConvInputKroneckerFactor(InverseProvidingFactor): patches. One of "extract_convolution_patches", "extract_image_patches", "extract_pointwise_conv2d_patches". has_bias: bool. If True, append 1 to in_channel. + sub_sample_inputs: `bool`. If True, then subsample the inputs from which + the image patches are extracted. (Default: None) + sub_sample_patches: `bool`, If `True` then subsample the extracted + patches.(Default: None) """ self._inputs = inputs self._filter_shape = filter_shape @@ -1182,7 +1273,15 @@ class ConvInputKroneckerFactor(InverseProvidingFactor): self._data_format = data_format self._extract_patches_fn = extract_patches_fn self._has_bias = has_bias + if sub_sample_inputs is None: + self._sub_sample_inputs = _SUB_SAMPLE_INPUTS + else: + self._sub_sample_inputs = sub_sample_inputs + if sub_sample_patches is None: + self._sub_sample_patches = _SUB_SAMPLE_OUTER_PRODUCTS + else: + self._sub_sample_patches = sub_sample_patches super(ConvInputKroneckerFactor, self).__init__() @property @@ -1215,6 +1314,10 @@ class ConvInputKroneckerFactor(InverseProvidingFactor): assert source == 0 inputs = self._inputs[tower] + if self._sub_sample_inputs: + batch_size = inputs.shape.as_list()[0] + max_size = int(batch_size * _INPUTS_TO_EXTRACT_PATCHES_FACTOR) + inputs = _random_tensor_gather(inputs, max_size) # TODO(b/64144716): there is potential here for a big savings in terms of # memory use. @@ -1260,8 +1363,12 @@ class ConvInputKroneckerFactor(InverseProvidingFactor): # |Delta| = number of spatial offsets, and J = number of input maps # for convolutional layer l. patches_flat = array_ops.reshape(patches, [-1, flatten_size]) + # We append a homogenous coordinate to patches_flat if the layer has # bias parameters. This gives us [[A_l]]_H from the paper. + if self._sub_sample_patches: + patches_flat = _subsample_for_cov_computation(patches_flat) + if self._has_bias: patches_flat = append_homog(patches_flat) # We call compute_cov without passing in a normalizer. compute_cov uses -- GitLab From fb7ce0375c325fc948b68126082b24bb0486c6a9 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 23 Apr 2018 08:43:18 -0700 Subject: [PATCH 147/434] Internal Change PiperOrigin-RevId: 193929733 --- tensorflow/compiler/aot/test.cc | 1 + tensorflow/compiler/xla/service/backend.cc | 1 + tensorflow/compiler/xla/shape_util.h | 1 + .../xla/tests/local_client_test_base.cc | 2 +- .../factorization/kernels/clustering_ops.cc | 1 + .../contrib/ffmpeg/default/ffmpeg_lib.cc | 2 +- tensorflow/core/BUILD | 6 ++- .../core/common_runtime/direct_session.cc | 2 +- .../kernel_benchmark_testlib.cc | 1 + .../core/common_runtime/local_device.cc | 1 + .../core/common_runtime/process_util.cc | 1 + tensorflow/core/framework/bfloat16.h | 1 + tensorflow/core/grappler/clusters/utils.cc | 1 + tensorflow/core/grappler/costs/utils.cc | 2 +- tensorflow/core/grappler/devices.cc | 1 + .../grappler/optimizers/constant_folding.cc | 1 + .../adaptive_shared_batch_scheduler.h | 1 + .../batching_util/shared_batch_scheduler.h | 1 + tensorflow/core/kernels/cast_op.h | 2 +- tensorflow/core/kernels/decode_raw_op.cc | 2 +- .../core/kernels/mkl_input_conversion_op.cc | 1 + tensorflow/core/kernels/mkl_tfconv_op.h | 1 + tensorflow/core/kernels/sparse_matmul_op.h | 1 + tensorflow/core/lib/bfloat16/bfloat16.h | 3 +- tensorflow/core/lib/core/coding.cc | 2 +- tensorflow/core/lib/core/raw_coding.h | 2 +- tensorflow/core/lib/gtl/inlined_vector.h | 2 +- tensorflow/core/lib/png/png_io.cc | 2 +- tensorflow/core/lib/wav/wav_io.cc | 2 +- tensorflow/core/platform/byte_order.h | 37 +++++++++++++++++++ tensorflow/core/platform/cpu_feature_guard.cc | 1 + tensorflow/core/platform/cpu_info.h | 7 ++-- tensorflow/core/platform/denormal.cc | 3 +- tensorflow/core/platform/windows/cpu_info.h | 9 ----- 34 files changed, 76 insertions(+), 28 deletions(-) create mode 100644 tensorflow/core/platform/byte_order.h diff --git a/tensorflow/compiler/aot/test.cc b/tensorflow/compiler/aot/test.cc index 47ef5f82cb..6b098049cb 100644 --- a/tensorflow/compiler/aot/test.cc +++ b/tensorflow/compiler/aot/test.cc @@ -35,6 +35,7 @@ limitations under the License. // clang-format on #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" +#include "tensorflow/core/platform/byte_order.h" #include "tensorflow/core/platform/cpu_info.h" #include "tensorflow/core/platform/test.h" #include "tensorflow/core/platform/test_benchmark.h" diff --git a/tensorflow/compiler/xla/service/backend.cc b/tensorflow/compiler/xla/service/backend.cc index a582dbffd6..b1d616ec35 100644 --- a/tensorflow/compiler/xla/service/backend.cc +++ b/tensorflow/compiler/xla/service/backend.cc @@ -31,6 +31,7 @@ limitations under the License. #include "tensorflow/core/common_runtime/eigen_thread_pool.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/core/threadpool.h" +#include "tensorflow/core/platform/byte_order.h" #include "tensorflow/core/platform/cpu_info.h" #include "tensorflow/core/platform/env.h" #include "tensorflow/core/platform/logging.h" diff --git a/tensorflow/compiler/xla/shape_util.h b/tensorflow/compiler/xla/shape_util.h index 63da9154cf..5fa728e7c2 100644 --- a/tensorflow/compiler/xla/shape_util.h +++ b/tensorflow/compiler/xla/shape_util.h @@ -31,6 +31,7 @@ limitations under the License. #include "tensorflow/core/lib/core/threadpool.h" #include "tensorflow/core/lib/gtl/array_slice.h" #include "tensorflow/core/lib/gtl/optional.h" +#include "tensorflow/core/platform/cpu_info.h" #include "tensorflow/core/platform/env.h" #include "tensorflow/core/platform/macros.h" #include "tensorflow/core/platform/types.h" diff --git a/tensorflow/compiler/xla/tests/local_client_test_base.cc b/tensorflow/compiler/xla/tests/local_client_test_base.cc index bb5aabb214..b615f0fead 100644 --- a/tensorflow/compiler/xla/tests/local_client_test_base.cc +++ b/tensorflow/compiler/xla/tests/local_client_test_base.cc @@ -27,7 +27,7 @@ limitations under the License. #include "tensorflow/compiler/xla/test_helpers.h" #include "tensorflow/core/common_runtime/eigen_thread_pool.h" #include "tensorflow/core/lib/core/threadpool.h" -#include "tensorflow/core/platform/cpu_info.h" +#include "tensorflow/core/platform/byte_order.h" #include "tensorflow/core/platform/env.h" #include "tensorflow/core/platform/logging.h" diff --git a/tensorflow/contrib/factorization/kernels/clustering_ops.cc b/tensorflow/contrib/factorization/kernels/clustering_ops.cc index 2a6c97e8b9..025534d540 100644 --- a/tensorflow/contrib/factorization/kernels/clustering_ops.cc +++ b/tensorflow/contrib/factorization/kernels/clustering_ops.cc @@ -32,6 +32,7 @@ #include "tensorflow/core/lib/gtl/top_n.h" #include "tensorflow/core/lib/random/philox_random.h" #include "tensorflow/core/lib/random/simple_philox.h" +#include "tensorflow/core/platform/byte_order.h" #include "tensorflow/core/platform/cpu_info.h" #include "tensorflow/core/platform/logging.h" diff --git a/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc b/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc index 35341406a0..cca1a05419 100644 --- a/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc +++ b/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc @@ -28,7 +28,7 @@ #include "tensorflow/core/lib/io/path.h" #include "tensorflow/core/lib/strings/numbers.h" #include "tensorflow/core/lib/strings/str_util.h" -#include "tensorflow/core/platform/cpu_info.h" +#include "tensorflow/core/platform/byte_order.h" #include "tensorflow/core/platform/env.h" using tensorflow::strings::StrCat; diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 5b04574a4f..a2ff29724b 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -271,7 +271,7 @@ PLATFORM_BASE_HDRS = [ "platform/logging.h", "platform/macros.h", "platform/types.h", - "platform/cpu_info.h", + "platform/byte_order.h", ] PLATFORM_OTHER_HDRS = [ @@ -279,6 +279,7 @@ PLATFORM_OTHER_HDRS = [ "platform/stacktrace.h", "platform/stacktrace_handler.h", "platform/context.h", + "platform/cpu_info.h", "platform/cpu_feature_guard.h", "platform/dynamic_annotations.h", "platform/env.h", @@ -307,7 +308,6 @@ cc_library( srcs = glob([ "platform/*/integral_types.h", "platform/*/logging.h", - "platform/*/cpu_info.h", ]), hdrs = PLATFORM_BASE_HDRS, deps = [ @@ -658,6 +658,7 @@ cc_library( "framework/tensor_types.h", "framework/type_traits.h", "lib/bfloat16/bfloat16.h", + "platform/byte_order.h", "platform/default/dynamic_annotations.h", "platform/default/integral_types.h", "platform/default/logging.h", @@ -1903,6 +1904,7 @@ cc_library( "lib/core/casts.h", "lib/core/stringpiece.h", "lib/png/png_io.h", + "platform/byte_order.h", "platform/cpu_info.h", "platform/default/integral_types.h", "platform/default/logging.h", diff --git a/tensorflow/core/common_runtime/direct_session.cc b/tensorflow/core/common_runtime/direct_session.cc index 0479061daf..0afbd02e86 100644 --- a/tensorflow/core/common_runtime/direct_session.cc +++ b/tensorflow/core/common_runtime/direct_session.cc @@ -54,7 +54,7 @@ limitations under the License. #include "tensorflow/core/lib/strings/numbers.h" #include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/lib/strings/strcat.h" -#include "tensorflow/core/platform/cpu_info.h" +#include "tensorflow/core/platform/byte_order.h" #include "tensorflow/core/platform/device_tracer.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/mutex.h" diff --git a/tensorflow/core/common_runtime/kernel_benchmark_testlib.cc b/tensorflow/core/common_runtime/kernel_benchmark_testlib.cc index 64d8849475..7de1b80e2d 100644 --- a/tensorflow/core/common_runtime/kernel_benchmark_testlib.cc +++ b/tensorflow/core/common_runtime/kernel_benchmark_testlib.cc @@ -28,6 +28,7 @@ limitations under the License. #include "tensorflow/core/lib/core/notification.h" #include "tensorflow/core/lib/core/threadpool.h" #include "tensorflow/core/lib/strings/str_util.h" +#include "tensorflow/core/platform/byte_order.h" #include "tensorflow/core/platform/cpu_info.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/test_benchmark.h" diff --git a/tensorflow/core/common_runtime/local_device.cc b/tensorflow/core/common_runtime/local_device.cc index ca7f1614f1..873182371e 100644 --- a/tensorflow/core/common_runtime/local_device.cc +++ b/tensorflow/core/common_runtime/local_device.cc @@ -19,6 +19,7 @@ limitations under the License. #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/common_runtime/eigen_thread_pool.h" #include "tensorflow/core/lib/core/threadpool.h" +#include "tensorflow/core/platform/byte_order.h" #include "tensorflow/core/platform/cpu_feature_guard.h" #include "tensorflow/core/platform/cpu_info.h" #include "tensorflow/core/platform/logging.h" diff --git a/tensorflow/core/common_runtime/process_util.cc b/tensorflow/core/common_runtime/process_util.cc index 22fd940d82..f8f3a1ecd7 100644 --- a/tensorflow/core/common_runtime/process_util.cc +++ b/tensorflow/core/common_runtime/process_util.cc @@ -21,6 +21,7 @@ limitations under the License. #include #include "tensorflow/core/lib/core/threadpool.h" +#include "tensorflow/core/platform/byte_order.h" #include "tensorflow/core/platform/cpu_info.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/tracing.h" diff --git a/tensorflow/core/framework/bfloat16.h b/tensorflow/core/framework/bfloat16.h index 968c18bdd2..2f79d0fa70 100644 --- a/tensorflow/core/framework/bfloat16.h +++ b/tensorflow/core/framework/bfloat16.h @@ -17,6 +17,7 @@ limitations under the License. #define TENSORFLOW_FRAMEWORK_BFLOAT16_H_ #include "tensorflow/core/framework/numeric_types.h" +#include "tensorflow/core/platform/byte_order.h" #include "tensorflow/core/platform/types.h" #if defined(PLATFORM_WINDOWS) diff --git a/tensorflow/core/grappler/clusters/utils.cc b/tensorflow/core/grappler/clusters/utils.cc index 50d6e6468f..a7519725a5 100644 --- a/tensorflow/core/grappler/clusters/utils.cc +++ b/tensorflow/core/grappler/clusters/utils.cc @@ -32,6 +32,7 @@ limitations under the License. #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/lib/strings/numbers.h" #include "tensorflow/core/lib/strings/strcat.h" +#include "tensorflow/core/platform/byte_order.h" #include "tensorflow/core/platform/cpu_info.h" #include "tensorflow/core/platform/mem.h" diff --git a/tensorflow/core/grappler/costs/utils.cc b/tensorflow/core/grappler/costs/utils.cc index f318e3911c..be54d98534 100644 --- a/tensorflow/core/grappler/costs/utils.cc +++ b/tensorflow/core/grappler/costs/utils.cc @@ -44,7 +44,7 @@ limitations under the License. #include "tensorflow/core/lib/core/bits.h" #include "tensorflow/core/lib/strings/numbers.h" #include "tensorflow/core/lib/strings/strcat.h" -#include "tensorflow/core/platform/cpu_info.h" +#include "tensorflow/core/platform/byte_order.h" #include "tensorflow/core/platform/env.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/protobuf.h" diff --git a/tensorflow/core/grappler/devices.cc b/tensorflow/core/grappler/devices.cc index b318ac22d4..2be894a08b 100644 --- a/tensorflow/core/grappler/devices.cc +++ b/tensorflow/core/grappler/devices.cc @@ -16,6 +16,7 @@ limitations under the License. #include #include "tensorflow/core/grappler/devices.h" +#include "tensorflow/core/platform/byte_order.h" #include "tensorflow/core/platform/cpu_info.h" #if GOOGLE_CUDA diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc index e29aaa25fe..45bb188e8d 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding.cc @@ -36,6 +36,7 @@ limitations under the License. #include "tensorflow/core/lib/gtl/inlined_vector.h" #include "tensorflow/core/lib/strings/numbers.h" #include "tensorflow/core/lib/strings/strcat.h" +#include "tensorflow/core/platform/cpu_info.h" #include "tensorflow/core/platform/denormal.h" #include "tensorflow/core/platform/env.h" #include "tensorflow/core/platform/setround.h" diff --git a/tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler.h b/tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler.h index 339d792302..f5ced95feb 100644 --- a/tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler.h +++ b/tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler.h @@ -28,6 +28,7 @@ limitations under the License. #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/lib/core/threadpool.h" +#include "tensorflow/core/platform/byte_order.h" #include "tensorflow/core/platform/cpu_info.h" #include "tensorflow/core/platform/env.h" #include "tensorflow/core/platform/thread_annotations.h" diff --git a/tensorflow/core/kernels/batching_util/shared_batch_scheduler.h b/tensorflow/core/kernels/batching_util/shared_batch_scheduler.h index b77289aded..edc88a0384 100644 --- a/tensorflow/core/kernels/batching_util/shared_batch_scheduler.h +++ b/tensorflow/core/kernels/batching_util/shared_batch_scheduler.h @@ -30,6 +30,7 @@ limitations under the License. #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/lib/strings/strcat.h" +#include "tensorflow/core/platform/byte_order.h" #include "tensorflow/core/platform/cpu_info.h" #include "tensorflow/core/platform/env.h" #include "tensorflow/core/platform/thread_annotations.h" diff --git a/tensorflow/core/kernels/cast_op.h b/tensorflow/core/kernels/cast_op.h index fd4e75d26f..16d2e0e0a5 100644 --- a/tensorflow/core/kernels/cast_op.h +++ b/tensorflow/core/kernels/cast_op.h @@ -21,7 +21,7 @@ limitations under the License. #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_types.h" #include "tensorflow/core/framework/types.h" -#include "tensorflow/core/platform/cpu_info.h" +#include "tensorflow/core/platform/byte_order.h" #include "tensorflow/core/platform/types.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/decode_raw_op.cc b/tensorflow/core/kernels/decode_raw_op.cc index bacacb94ae..eaef5a6097 100644 --- a/tensorflow/core/kernels/decode_raw_op.cc +++ b/tensorflow/core/kernels/decode_raw_op.cc @@ -21,7 +21,7 @@ limitations under the License. #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/types.h" #include "tensorflow/core/lib/core/errors.h" -#include "tensorflow/core/platform/cpu_info.h" +#include "tensorflow/core/platform/byte_order.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/mkl_input_conversion_op.cc b/tensorflow/core/kernels/mkl_input_conversion_op.cc index dcf6bb9f74..ea763ce85b 100644 --- a/tensorflow/core/kernels/mkl_input_conversion_op.cc +++ b/tensorflow/core/kernels/mkl_input_conversion_op.cc @@ -24,6 +24,7 @@ limitations under the License. #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/kernels/ops_util.h" +#include "tensorflow/core/platform/byte_order.h" #include "tensorflow/core/platform/cpu_info.h" #include "tensorflow/core/platform/macros.h" #include "tensorflow/core/util/tensor_format.h" diff --git a/tensorflow/core/kernels/mkl_tfconv_op.h b/tensorflow/core/kernels/mkl_tfconv_op.h index ddea9e281b..4120f013ac 100644 --- a/tensorflow/core/kernels/mkl_tfconv_op.h +++ b/tensorflow/core/kernels/mkl_tfconv_op.h @@ -27,6 +27,7 @@ limitations under the License. #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/kernels/ops_util.h" +#include "tensorflow/core/platform/byte_order.h" #include "tensorflow/core/platform/cpu_info.h" #include "tensorflow/core/platform/macros.h" #include "tensorflow/core/util/tensor_format.h" diff --git a/tensorflow/core/kernels/sparse_matmul_op.h b/tensorflow/core/kernels/sparse_matmul_op.h index 14ef2ed704..e89280724e 100644 --- a/tensorflow/core/kernels/sparse_matmul_op.h +++ b/tensorflow/core/kernels/sparse_matmul_op.h @@ -17,6 +17,7 @@ limitations under the License. #define TENSORFLOW_KERNELS_SPARSE_MATMUL_OP_H_ #include "third_party/eigen3/Eigen/Core" +#include "tensorflow/core/platform/byte_order.h" #include "tensorflow/core/platform/types.h" #if defined(PLATFORM_WINDOWS) diff --git a/tensorflow/core/lib/bfloat16/bfloat16.h b/tensorflow/core/lib/bfloat16/bfloat16.h index 126e5a17af..e7c24387a4 100644 --- a/tensorflow/core/lib/bfloat16/bfloat16.h +++ b/tensorflow/core/lib/bfloat16/bfloat16.h @@ -19,8 +19,7 @@ limitations under the License. #include #include -// We need cpu_info.h here in order to pick up __BYTE_ORDER__. -#include "tensorflow/core/platform/cpu_info.h" +#include "tensorflow/core/platform/byte_order.h" #ifdef __CUDACC__ // All functions callable from CUDA code must be qualified with __device__ diff --git a/tensorflow/core/lib/core/coding.cc b/tensorflow/core/lib/core/coding.cc index bb95c27410..50872eef83 100644 --- a/tensorflow/core/lib/core/coding.cc +++ b/tensorflow/core/lib/core/coding.cc @@ -15,7 +15,7 @@ limitations under the License. #include "tensorflow/core/lib/core/coding.h" -#include "tensorflow/core/platform/cpu_info.h" +#include "tensorflow/core/platform/byte_order.h" namespace tensorflow { namespace core { diff --git a/tensorflow/core/lib/core/raw_coding.h b/tensorflow/core/lib/core/raw_coding.h index bbfd33d303..37201b755d 100644 --- a/tensorflow/core/lib/core/raw_coding.h +++ b/tensorflow/core/lib/core/raw_coding.h @@ -17,7 +17,7 @@ limitations under the License. #define TENSORFLOW_LIB_CORE_RAW_CODING_H_ #include -#include "tensorflow/core/platform/cpu_info.h" +#include "tensorflow/core/platform/byte_order.h" #include "tensorflow/core/platform/types.h" namespace tensorflow { diff --git a/tensorflow/core/lib/gtl/inlined_vector.h b/tensorflow/core/lib/gtl/inlined_vector.h index 6e3cb2206d..2011f7d4a1 100644 --- a/tensorflow/core/lib/gtl/inlined_vector.h +++ b/tensorflow/core/lib/gtl/inlined_vector.h @@ -43,7 +43,7 @@ limitations under the License. #include #include "tensorflow/core/lib/gtl/manual_constructor.h" -#include "tensorflow/core/platform/cpu_info.h" +#include "tensorflow/core/platform/byte_order.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/mem.h" #include "tensorflow/core/platform/types.h" diff --git a/tensorflow/core/lib/png/png_io.cc b/tensorflow/core/lib/png/png_io.cc index cba473927d..62c803afb2 100644 --- a/tensorflow/core/lib/png/png_io.cc +++ b/tensorflow/core/lib/png/png_io.cc @@ -26,7 +26,7 @@ limitations under the License. #include "tensorflow/core/lib/core/casts.h" #include "tensorflow/core/lib/png/png_io.h" -#include "tensorflow/core/platform/cpu_info.h" // endian +#include "tensorflow/core/platform/byte_order.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/png.h" diff --git a/tensorflow/core/lib/wav/wav_io.cc b/tensorflow/core/lib/wav/wav_io.cc index 51b9c6cd82..3f7dbcee85 100644 --- a/tensorflow/core/lib/wav/wav_io.cc +++ b/tensorflow/core/lib/wav/wav_io.cc @@ -23,7 +23,7 @@ limitations under the License. #include "tensorflow/core/lib/core/coding.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/wav/wav_io.h" -#include "tensorflow/core/platform/cpu_info.h" +#include "tensorflow/core/platform/byte_order.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" diff --git a/tensorflow/core/platform/byte_order.h b/tensorflow/core/platform/byte_order.h new file mode 100644 index 0000000000..aab6535e4b --- /dev/null +++ b/tensorflow/core/platform/byte_order.h @@ -0,0 +1,37 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_PLATFORM_BYTE_ORDER_H_ +#define TENSORFLOW_CORE_PLATFORM_BYTE_ORDER_H_ + +// Byte order defines provided by gcc. MSVC doesn't define those so +// we define them here. +// We assume that all windows platform out there are little endian. +#if defined(_MSC_VER) && !defined(__clang__) +#define __ORDER_LITTLE_ENDIAN__ 0x4d2 +#define __ORDER_BIG_ENDIAN__ 0x10e1 +#define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__ +#endif + +namespace tensorflow { +namespace port { + +// TODO(jeff,sanjay): Make portable +constexpr bool kLittleEndian = __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__; + +} // namespace port +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_PLATFORM_BYTE_ORDER_H_ diff --git a/tensorflow/core/platform/cpu_feature_guard.cc b/tensorflow/core/platform/cpu_feature_guard.cc index b570658158..9d00aa7b7f 100644 --- a/tensorflow/core/platform/cpu_feature_guard.cc +++ b/tensorflow/core/platform/cpu_feature_guard.cc @@ -18,6 +18,7 @@ limitations under the License. #include #include +#include "tensorflow/core/platform/byte_order.h" #include "tensorflow/core/platform/cpu_info.h" #include "tensorflow/core/platform/logging.h" diff --git a/tensorflow/core/platform/cpu_info.h b/tensorflow/core/platform/cpu_info.h index bb77650e26..b5be7e8b54 100644 --- a/tensorflow/core/platform/cpu_info.h +++ b/tensorflow/core/platform/cpu_info.h @@ -18,6 +18,10 @@ limitations under the License. #include +// TODO(ahentz): This is not strictly required here but, for historical +// reasons, many people depend on cpu_info.h in order to use kLittleEndian. +#include "tensorflow/core/platform/byte_order.h" + #if defined(_MSC_VER) #include "tensorflow/core/platform/windows/cpu_info.h" #endif @@ -25,9 +29,6 @@ limitations under the License. namespace tensorflow { namespace port { -// TODO(jeff,sanjay): Make portable -constexpr bool kLittleEndian = __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__; - // Returns an estimate of the number of schedulable CPUs for this // process. Usually, it's constant throughout the lifetime of a // process, but it might change if the underlying cluster management diff --git a/tensorflow/core/platform/denormal.cc b/tensorflow/core/platform/denormal.cc index 82cbc43b4f..c510dc204f 100644 --- a/tensorflow/core/platform/denormal.cc +++ b/tensorflow/core/platform/denormal.cc @@ -15,8 +15,9 @@ limitations under the License. #include -#include "tensorflow/core/platform/denormal.h" +#include "tensorflow/core/platform/byte_order.h" #include "tensorflow/core/platform/cpu_info.h" +#include "tensorflow/core/platform/denormal.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/platform.h" // If we're on gcc 4.8 or older, there's a known bug that prevents the use of diff --git a/tensorflow/core/platform/windows/cpu_info.h b/tensorflow/core/platform/windows/cpu_info.h index f20939d3c0..ba2126abcf 100644 --- a/tensorflow/core/platform/windows/cpu_info.h +++ b/tensorflow/core/platform/windows/cpu_info.h @@ -19,13 +19,4 @@ limitations under the License. // included so __cpuidex function is available for GETCPUID on Windows #include -// Byte order defines provided by gcc. MSVC doesn't define those so -// we define them here. -// We assume that all windows platform out there are little endian. -#if defined(_MSC_VER) && !defined(__clang__) -#define __ORDER_LITTLE_ENDIAN__ 0x4d2 -#define __ORDER_BIG_ENDIAN__ 0x10e1 -#define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__ -#endif - #endif // TENSORFLOW_PLATFORM_WINDOWS_CPU_INFO_H_ -- GitLab From 26ff316f49e613a7f9cba02dd5e7d6cd5aa78623 Mon Sep 17 00:00:00 2001 From: Pavithra Vijay Date: Mon, 23 Apr 2018 11:03:13 -0700 Subject: [PATCH 148/434] Fix flaky stateful metrics test PiperOrigin-RevId: 193951580 --- .../keras/_impl/keras/engine/network.py | 2 +- .../python/keras/_impl/keras/metrics_test.py | 129 +++++++++--------- 2 files changed, 66 insertions(+), 65 deletions(-) diff --git a/tensorflow/python/keras/_impl/keras/engine/network.py b/tensorflow/python/keras/_impl/keras/engine/network.py index cc177c14a8..3b419dff3a 100644 --- a/tensorflow/python/keras/_impl/keras/engine/network.py +++ b/tensorflow/python/keras/_impl/keras/engine/network.py @@ -126,7 +126,7 @@ class Network(base_layer.Layer): else: self.outputs = [outputs] - # User-prodived argument validation. + # User-provided argument validation. if context.executing_eagerly(): # Check that all inputs/outputs are DeferredTensors. for tensor in self.inputs: diff --git a/tensorflow/python/keras/_impl/keras/metrics_test.py b/tensorflow/python/keras/_impl/keras/metrics_test.py index 9deaab0c05..13cef97812 100644 --- a/tensorflow/python/keras/_impl/keras/metrics_test.py +++ b/tensorflow/python/keras/_impl/keras/metrics_test.py @@ -75,74 +75,75 @@ class KerasMetricsTest(test.TestCase): self.assertEqual(result, 0.) def test_stateful_metrics(self): - np.random.seed(1334) - - class BinaryTruePositives(keras.layers.Layer): - """Stateful Metric to count the total true positives over all batches. - - Assumes predictions and targets of shape `(samples, 1)`. - - Arguments: - threshold: Float, lower limit on prediction value that counts as a - positive class prediction. - name: String, name for the metric. - """ - - def __init__(self, name='true_positives', **kwargs): - super(BinaryTruePositives, self).__init__(name=name, **kwargs) - self.true_positives = keras.backend.variable(value=0, dtype='int32') - - def reset_states(self): - keras.backend.set_value(self.true_positives, 0) + with self.test_session(): + np.random.seed(1334) - def __call__(self, y_true, y_pred): - """Computes the number of true positives in a batch. + class BinaryTruePositives(keras.layers.Layer): + """Stateful Metric to count the total true positives over all batches. - Args: - y_true: Tensor, batch_wise labels - y_pred: Tensor, batch_wise predictions + Assumes predictions and targets of shape `(samples, 1)`. - Returns: - The total number of true positives seen this epoch at the - completion of the batch. + Arguments: + threshold: Float, lower limit on prediction value that counts as a + positive class prediction. + name: String, name for the metric. """ - y_true = math_ops.cast(y_true, 'int32') - y_pred = math_ops.cast(math_ops.round(y_pred), 'int32') - correct_preds = math_ops.cast(math_ops.equal(y_pred, y_true), 'int32') - true_pos = math_ops.cast( - math_ops.reduce_sum(correct_preds * y_true), 'int32') - current_true_pos = self.true_positives * 1 - self.add_update( - state_ops.assign_add(self.true_positives, true_pos), - inputs=[y_true, y_pred]) - return current_true_pos + true_pos - - metric_fn = BinaryTruePositives() - config = keras.metrics.serialize(metric_fn) - metric_fn = keras.metrics.deserialize( - config, custom_objects={'BinaryTruePositives': BinaryTruePositives}) - - # Test on simple model - inputs = keras.Input(shape=(2,)) - outputs = keras.layers.Dense(1, activation='sigmoid')(inputs) - model = keras.Model(inputs, outputs) - model.compile(optimizer='sgd', - loss='binary_crossentropy', - metrics=['acc', metric_fn]) - - # Test fit, evaluate - samples = 1000 - x = np.random.random((samples, 2)) - y = np.random.randint(2, size=(samples, 1)) - model.fit(x, y, epochs=1, batch_size=10) - outs = model.evaluate(x, y, batch_size=10) - preds = model.predict(x) - - def ref_true_pos(y_true, y_pred): - return np.sum(np.logical_and(y_pred > 0.5, y_true == 1)) - - # Test correctness (e.g. updates should have been run) - self.assertAllClose(outs[2], ref_true_pos(y, preds), atol=1e-5) + + def __init__(self, name='true_positives', **kwargs): + super(BinaryTruePositives, self).__init__(name=name, **kwargs) + self.true_positives = keras.backend.variable(value=0, dtype='int32') + + def reset_states(self): + keras.backend.set_value(self.true_positives, 0) + + def __call__(self, y_true, y_pred): + """Computes the number of true positives in a batch. + + Args: + y_true: Tensor, batch_wise labels + y_pred: Tensor, batch_wise predictions + + Returns: + The total number of true positives seen this epoch at the + completion of the batch. + """ + y_true = math_ops.cast(y_true, 'int32') + y_pred = math_ops.cast(math_ops.round(y_pred), 'int32') + correct_preds = math_ops.cast(math_ops.equal(y_pred, y_true), 'int32') + true_pos = math_ops.cast( + math_ops.reduce_sum(correct_preds * y_true), 'int32') + current_true_pos = self.true_positives * 1 + self.add_update( + state_ops.assign_add(self.true_positives, true_pos), + inputs=[y_true, y_pred]) + return current_true_pos + true_pos + + metric_fn = BinaryTruePositives() + config = keras.metrics.serialize(metric_fn) + metric_fn = keras.metrics.deserialize( + config, custom_objects={'BinaryTruePositives': BinaryTruePositives}) + + # Test on simple model + inputs = keras.Input(shape=(2,)) + outputs = keras.layers.Dense(1, activation='sigmoid')(inputs) + model = keras.Model(inputs, outputs) + model.compile(optimizer='sgd', + loss='binary_crossentropy', + metrics=['acc', metric_fn]) + + # Test fit, evaluate + samples = 1000 + x = np.random.random((samples, 2)) + y = np.random.randint(2, size=(samples, 1)) + model.fit(x, y, epochs=1, batch_size=10) + outs = model.evaluate(x, y, batch_size=10) + preds = model.predict(x) + + def ref_true_pos(y_true, y_pred): + return np.sum(np.logical_and(y_pred > 0.5, y_true == 1)) + + # Test correctness (e.g. updates should have been run) + self.assertAllClose(outs[2], ref_true_pos(y, preds), atol=1e-5) if __name__ == '__main__': -- GitLab From f0d5d2047833c7221ce3be1690689ca1c6658add Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 23 Apr 2018 11:23:01 -0700 Subject: [PATCH 149/434] Convert int -> size_t so that implicit conversion doesn't lose integer precision. PiperOrigin-RevId: 193955175 --- tensorflow/contrib/lite/context.h | 6 +++--- tensorflow/contrib/lite/interpreter.cc | 13 +++++++++---- tensorflow/contrib/lite/interpreter.h | 12 ++++++------ tensorflow/contrib/lite/interpreter_test.cc | 8 ++++---- tensorflow/contrib/lite/optional_debug_tools.cc | 2 +- 5 files changed, 23 insertions(+), 18 deletions(-) diff --git a/tensorflow/contrib/lite/context.h b/tensorflow/contrib/lite/context.h index 0b38f43cd3..12841d233c 100644 --- a/tensorflow/contrib/lite/context.h +++ b/tensorflow/contrib/lite/context.h @@ -275,7 +275,7 @@ typedef struct { typedef struct TfLiteContext { // Number of tensors in the context. - int tensors_size; + size_t tensors_size; // The execution plan contains a list of the node indices in execution // order. execution_plan->size is the current number of nodes. And, @@ -397,13 +397,13 @@ typedef struct _TfLiteDelegate { // This can be null if the delegate doesn't use its own buffer. TfLiteStatus (*CopyFromBufferHandle)(TfLiteDelegate* delegate, TfLiteBufferHandle buffer_handle, - void* data, int size); + void* data, size_t size); // Copy the data from raw memory to delegate buffer handle. // This can be null if the delegate doesn't use its own buffer. TfLiteStatus (*CopyToBufferHandle)(TfLiteDelegate* delegate, TfLiteBufferHandle buffer_handle, - void* data, int size); + void* data, size_t size); // Free the Delegate Buffer Handle. Note: This only frees the handle, but // this doesn't release the underlying resource (e.g. textures). The diff --git a/tensorflow/contrib/lite/interpreter.cc b/tensorflow/contrib/lite/interpreter.cc index 91b6c414bf..9d8ea55fd1 100644 --- a/tensorflow/contrib/lite/interpreter.cc +++ b/tensorflow/contrib/lite/interpreter.cc @@ -308,7 +308,12 @@ TfLiteStatus Interpreter::CheckTensorIndices(const char* label, for (int i = 0; i < length; i++) { int index = indices[i]; - if (index < kOptionalTensor || index >= context_.tensors_size) { + // Continue if index == kOptionalTensor before additional comparisons below, + // size_t(-1) is always >= context_tensors_size. + if (index == kOptionalTensor) { + continue; + } + if (index < 0 || static_cast(index) >= context_.tensors_size) { ReportError(&context_, "Invalid tensor index %d in %s\n", index, label); consistent_ = false; return kTfLiteError; @@ -318,7 +323,7 @@ TfLiteStatus Interpreter::CheckTensorIndices(const char* label, } TfLiteStatus Interpreter::BytesRequired(TfLiteType type, const int* dims, - int dims_size, size_t* bytes) { + size_t dims_size, size_t* bytes) { // TODO(aselle): Check for overflow here using overflow.h in TensorFlow // MultiplyWithoutOverflow. TF_LITE_ENSURE(&context_, bytes != nullptr); @@ -645,7 +650,7 @@ TfLiteStatus Interpreter::GetNodeAndRegistration( } TfLiteStatus Interpreter::SetTensorParametersReadOnly( - int tensor_index, TfLiteType type, const char* name, const int rank, + int tensor_index, TfLiteType type, const char* name, const size_t rank, const int* dims, TfLiteQuantizationParams quantization, const char* buffer, size_t bytes, const Allocation* allocation) { if (state_ == kStateInvokableAndImmutable) { @@ -691,7 +696,7 @@ TfLiteStatus Interpreter::SetTensorParametersReadOnly( // bytes. The lifetime of buffer must be ensured to be greater or equal // to Interpreter. TfLiteStatus Interpreter::SetTensorParametersReadWrite( - int tensor_index, TfLiteType type, const char* name, const int rank, + int tensor_index, TfLiteType type, const char* name, const size_t rank, const int* dims, TfLiteQuantizationParams quantization) { if (state_ == kStateInvokableAndImmutable) { ReportError( diff --git a/tensorflow/contrib/lite/interpreter.h b/tensorflow/contrib/lite/interpreter.h index a49134b95e..6f3433abcf 100644 --- a/tensorflow/contrib/lite/interpreter.h +++ b/tensorflow/contrib/lite/interpreter.h @@ -150,7 +150,7 @@ class Interpreter { }; TfLiteStatus SetTensorParametersReadOnly( - int tensor_index, TfLiteType type, const char* name, const int rank, + int tensor_index, TfLiteType type, const char* name, const size_t rank, const int* dims, TfLiteQuantizationParams quantization, const char* buffer, size_t bytes, const Allocation* allocation = nullptr); @@ -165,7 +165,7 @@ class Interpreter { dims.data(), quantization); } TfLiteStatus SetTensorParametersReadWrite( - int tensor_index, TfLiteType type, const char* name, const int rank, + int tensor_index, TfLiteType type, const char* name, const size_t rank, const int* dims, TfLiteQuantizationParams quantization); // Functions to access tensor data @@ -189,10 +189,10 @@ class Interpreter { } // Return the number of tensors in the model. - int tensors_size() const { return context_.tensors_size; } + size_t tensors_size() const { return context_.tensors_size; } // Return the number of ops in the model. - int nodes_size() const { return nodes_and_registration_.size(); } + size_t nodes_size() const { return nodes_and_registration_.size(); } // WARNING: Experimental interface, subject to change const std::vector& execution_plan() const { return execution_plan_; } @@ -406,7 +406,7 @@ class Interpreter { // Compute the number of bytes required to represent a tensor with dimensions // specified by the array dims (of length dims_size). Returns the status code // and bytes. - TfLiteStatus BytesRequired(TfLiteType type, const int* dims, int dims_size, + TfLiteStatus BytesRequired(TfLiteType type, const int* dims, size_t dims_size, size_t* bytes); // Request an tensor be resized implementation. If the given tensor is of @@ -467,7 +467,7 @@ class Interpreter { // tensors. After calling this function, adding `kTensorsCapacityHeadroom` // more tensors won't invalidate the pointer to existing tensors. void EnsureTensorsVectorCapacity() { - const int required_capacity = tensors_size() + kTensorsCapacityHeadroom; + const size_t required_capacity = tensors_size() + kTensorsCapacityHeadroom; if (required_capacity > tensors_.capacity()) { tensors_.reserve(required_capacity); context_.tensors = tensors_.data(); diff --git a/tensorflow/contrib/lite/interpreter_test.cc b/tensorflow/contrib/lite/interpreter_test.cc index 131e088079..453c1ada1c 100644 --- a/tensorflow/contrib/lite/interpreter_test.cc +++ b/tensorflow/contrib/lite/interpreter_test.cc @@ -887,15 +887,15 @@ class TestDelegate : public ::testing::Test { TfLiteIntArrayFree(nodes_to_separate); return kTfLiteOk; }; - delegate_.CopyToBufferHandle = [](TfLiteDelegate* delegate, - TfLiteBufferHandle buffer_handle, - void* data, int size) -> TfLiteStatus { + delegate_.CopyToBufferHandle = + [](TfLiteDelegate* delegate, TfLiteBufferHandle buffer_handle, + void* data, size_t size) -> TfLiteStatus { // TODO(ycling): Implement tests to test buffer copying logic. return kTfLiteOk; }; delegate_.CopyFromBufferHandle = [](TfLiteDelegate* delegate, TfLiteBufferHandle buffer_handle, - void* data, int size) -> TfLiteStatus { + void* data, size_t size) -> TfLiteStatus { // TODO(ycling): Implement tests to test buffer copying logic. return kTfLiteOk; }; diff --git a/tensorflow/contrib/lite/optional_debug_tools.cc b/tensorflow/contrib/lite/optional_debug_tools.cc index e1366639c7..e0a0910117 100644 --- a/tensorflow/contrib/lite/optional_debug_tools.cc +++ b/tensorflow/contrib/lite/optional_debug_tools.cc @@ -72,7 +72,7 @@ const char* AllocTypeName(TfLiteAllocationType type) { // Prints a dump of what tensors and what nodes are in the interpreter. void PrintInterpreterState(Interpreter* interpreter) { - printf("Interpreter has %d tensors and %d nodes\n", + printf("Interpreter has %lu tensors and %lu nodes\n", interpreter->tensors_size(), interpreter->nodes_size()); printf("Inputs:"); PrintIntVector(interpreter->inputs()); -- GitLab From 829ec055afdfca3424030794c469d19290df13fe Mon Sep 17 00:00:00 2001 From: Yifei Feng <1192265+yifeif@users.noreply.github.com> Date: Mon, 23 Apr 2018 11:44:22 -0700 Subject: [PATCH 150/434] Update resources.h --- .../core/kernels/boosted_trees/resources.h | 20 ------------------- 1 file changed, 20 deletions(-) diff --git a/tensorflow/core/kernels/boosted_trees/resources.h b/tensorflow/core/kernels/boosted_trees/resources.h index ef42604897..df78d3f275 100644 --- a/tensorflow/core/kernels/boosted_trees/resources.h +++ b/tensorflow/core/kernels/boosted_trees/resources.h @@ -82,26 +82,6 @@ class BoostedTreesEnsembleResource : public StampedResource { int64 GetNumNodes(const int32 tree_id); - void UpdateLastLayerNodesRange(const int32 node_range_start, - int32 node_range_end) const { - tree_ensemble_->mutable_growing_metadata()->set_last_layer_node_start( - node_range_start); - tree_ensemble_->mutable_growing_metadata()->set_last_layer_node_end( - node_range_end); - } - - void GetLastLayerNodesRange(int32* node_range_start, - int32* node_range_end) const { - *node_range_start = - tree_ensemble_->growing_metadata().last_layer_node_start(); - *node_range_end = tree_ensemble_->growing_metadata().last_layer_node_end(); - } - - int64 GetNumNodes(const int32 tree_id) { - DCHECK_LT(tree_id, tree_ensemble_->trees_size()); - return tree_ensemble_->trees(tree_id).nodes_size(); - } - void UpdateGrowingMetadata() const; int32 GetNumLayersAttempted(); -- GitLab From d93e09fbd3408f6ee1647addfdca1eef00139223 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 23 Apr 2018 11:42:02 -0700 Subject: [PATCH 151/434] Add fast shuffled fully-connected path also for the case where the RHS has 4 columns (so far was only for the GEMV case where RHS has 1 column). Also pre-shuffle / pre-xor the input activations, not just the weights. We need a buffer for that, so the shuffled FullyConnected operator gets a second output acting as its workspace, similar to what we have been doing for Conv operators needed a im2col workspace buffer. PiperOrigin-RevId: 193958461 --- .../internal/optimized/optimized_ops.h | 448 +++++++++++++----- .../internal/reference/reference_ops.h | 155 ++++-- .../experimental_shuffle_fc_weights.cc | 27 +- tensorflow/contrib/lite/toco/tooling_util.cc | 15 +- 4 files changed, 483 insertions(+), 162 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h index 2e2721e093..49ce1133d3 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h @@ -1209,109 +1209,275 @@ void FullyConnected(const uint8* input_data, const Dims<4>& input_dims, // as the 'task' for worker threads to run (multi-threaded case, see // ExperimentalShuffledFullyConnectedWorkerTask below). inline void ExperimentalShuffledFullyConnectedWorkerImpl( - const uint8* input_data, const int8* shuffled_weights_data, - int output_depth, int accum_depth, const int32* bias_data, + const uint8* shuffled_input_workspace_data, + const int8* shuffled_weights_data, int batches, int output_depth, + int output_stride, int accum_depth, const int32* bias_data, int32 output_multiplier, int output_shift, int16* output_data) { - const int8* shuffled_weights_ptr = shuffled_weights_data; #if defined USE_NEON - // We'll only need to xor signbit to the input activation values, as - // that xor-ing is pre-built into the shuffled weights values. - const uint8x16_t signbit = vdupq_n_u8(0x80); - const int right_shift = output_shift > 0 ? output_shift : 0; - const int left_shift = output_shift > 0 ? 0 : -output_shift; - for (int c = 0; c < output_depth; c += 4) { - // Accumulation loop. - int32x4_t row_accum0 = vdupq_n_s32(0); - int32x4_t row_accum1 = vdupq_n_s32(0); - int32x4_t row_accum2 = vdupq_n_s32(0); - int32x4_t row_accum3 = vdupq_n_s32(0); - for (int d = 0; d < accum_depth; d += 16) { - int8x16_t weights0 = vld1q_s8(shuffled_weights_ptr + 0); - int8x16_t weights1 = vld1q_s8(shuffled_weights_ptr + 16); - int8x16_t weights2 = vld1q_s8(shuffled_weights_ptr + 32); - int8x16_t weights3 = vld1q_s8(shuffled_weights_ptr + 48); - shuffled_weights_ptr += 64; - int8x16_t input = - vreinterpretq_s8_u8(veorq_u8(signbit, vld1q_u8(input_data + d))); - int16x8_t local_accum0 = - vmull_s8(vget_low_s8(weights0), vget_low_s8(input)); - int16x8_t local_accum1 = - vmull_s8(vget_low_s8(weights1), vget_low_s8(input)); - int16x8_t local_accum2 = - vmull_s8(vget_low_s8(weights2), vget_low_s8(input)); - int16x8_t local_accum3 = - vmull_s8(vget_low_s8(weights3), vget_low_s8(input)); - local_accum0 = - vmlal_s8(local_accum0, vget_high_s8(weights0), vget_high_s8(input)); - local_accum1 = - vmlal_s8(local_accum1, vget_high_s8(weights1), vget_high_s8(input)); - local_accum2 = - vmlal_s8(local_accum2, vget_high_s8(weights2), vget_high_s8(input)); - local_accum3 = - vmlal_s8(local_accum3, vget_high_s8(weights3), vget_high_s8(input)); - row_accum0 = vpadalq_s16(row_accum0, local_accum0); - row_accum1 = vpadalq_s16(row_accum1, local_accum1); - row_accum2 = vpadalq_s16(row_accum2, local_accum2); - row_accum3 = vpadalq_s16(row_accum3, local_accum3); + const int8* shuffled_weights_ptr = shuffled_weights_data; + if (batches == 1) { + const int right_shift = output_shift > 0 ? output_shift : 0; + const int left_shift = output_shift > 0 ? 0 : -output_shift; + for (int c = 0; c < output_depth; c += 4) { + // Accumulation loop. + int32x4_t row_accum0 = vdupq_n_s32(0); + int32x4_t row_accum1 = vdupq_n_s32(0); + int32x4_t row_accum2 = vdupq_n_s32(0); + int32x4_t row_accum3 = vdupq_n_s32(0); + for (int d = 0; d < accum_depth; d += 16) { + int8x16_t weights0 = vld1q_s8(shuffled_weights_ptr + 0); + int8x16_t weights1 = vld1q_s8(shuffled_weights_ptr + 16); + int8x16_t weights2 = vld1q_s8(shuffled_weights_ptr + 32); + int8x16_t weights3 = vld1q_s8(shuffled_weights_ptr + 48); + shuffled_weights_ptr += 64; + int8x16_t input = + vreinterpretq_s8_u8(vld1q_u8(shuffled_input_workspace_data + d)); + int16x8_t local_accum0 = + vmull_s8(vget_low_s8(weights0), vget_low_s8(input)); + int16x8_t local_accum1 = + vmull_s8(vget_low_s8(weights1), vget_low_s8(input)); + int16x8_t local_accum2 = + vmull_s8(vget_low_s8(weights2), vget_low_s8(input)); + int16x8_t local_accum3 = + vmull_s8(vget_low_s8(weights3), vget_low_s8(input)); + local_accum0 = + vmlal_s8(local_accum0, vget_high_s8(weights0), vget_high_s8(input)); + local_accum1 = + vmlal_s8(local_accum1, vget_high_s8(weights1), vget_high_s8(input)); + local_accum2 = + vmlal_s8(local_accum2, vget_high_s8(weights2), vget_high_s8(input)); + local_accum3 = + vmlal_s8(local_accum3, vget_high_s8(weights3), vget_high_s8(input)); + row_accum0 = vpadalq_s16(row_accum0, local_accum0); + row_accum1 = vpadalq_s16(row_accum1, local_accum1); + row_accum2 = vpadalq_s16(row_accum2, local_accum2); + row_accum3 = vpadalq_s16(row_accum3, local_accum3); + } + // Horizontally reduce accumulators + int32x2_t pairwise_reduced_acc_0, pairwise_reduced_acc_1, + pairwise_reduced_acc_2, pairwise_reduced_acc_3; + pairwise_reduced_acc_0 = + vpadd_s32(vget_low_s32(row_accum0), vget_high_s32(row_accum0)); + pairwise_reduced_acc_1 = + vpadd_s32(vget_low_s32(row_accum1), vget_high_s32(row_accum1)); + pairwise_reduced_acc_2 = + vpadd_s32(vget_low_s32(row_accum2), vget_high_s32(row_accum2)); + pairwise_reduced_acc_3 = + vpadd_s32(vget_low_s32(row_accum3), vget_high_s32(row_accum3)); + const int32x2_t reduced_lo = + vpadd_s32(pairwise_reduced_acc_0, pairwise_reduced_acc_1); + const int32x2_t reduced_hi = + vpadd_s32(pairwise_reduced_acc_2, pairwise_reduced_acc_3); + int32x4_t reduced = vcombine_s32(reduced_lo, reduced_hi); + // Add bias values. + int32x4_t bias_vec = vld1q_s32(bias_data + c); + reduced = vaddq_s32(reduced, bias_vec); + reduced = vshlq_s32(reduced, vdupq_n_s32(left_shift)); + // Multiply by the fixed-point multiplier. + reduced = vqrdmulhq_n_s32(reduced, output_multiplier); + // Rounding-shift-right. + using gemmlowp::RoundingDivideByPOT; + reduced = RoundingDivideByPOT(reduced, right_shift); + // Narrow values down to 16 bit signed. + const int16x4_t res16 = vqmovn_s32(reduced); + vst1_s16(output_data + c, res16); } - // Horizontally reduce accumulators - int32x2_t pairwise_reduced_acc_0, pairwise_reduced_acc_1, - pairwise_reduced_acc_2, pairwise_reduced_acc_3; - pairwise_reduced_acc_0 = - vpadd_s32(vget_low_s32(row_accum0), vget_high_s32(row_accum0)); - pairwise_reduced_acc_1 = - vpadd_s32(vget_low_s32(row_accum1), vget_high_s32(row_accum1)); - pairwise_reduced_acc_2 = - vpadd_s32(vget_low_s32(row_accum2), vget_high_s32(row_accum2)); - pairwise_reduced_acc_3 = - vpadd_s32(vget_low_s32(row_accum3), vget_high_s32(row_accum3)); - const int32x2_t reduced_lo = - vpadd_s32(pairwise_reduced_acc_0, pairwise_reduced_acc_1); - const int32x2_t reduced_hi = - vpadd_s32(pairwise_reduced_acc_2, pairwise_reduced_acc_3); - int32x4_t reduced = vcombine_s32(reduced_lo, reduced_hi); - // Add bias values. - int32x4_t bias_vec = vld1q_s32(bias_data + c); - reduced = vaddq_s32(reduced, bias_vec); - reduced = vshlq_s32(reduced, vdupq_n_s32(left_shift)); - // Multiply by the fixed-point multiplier. - reduced = vqrdmulhq_n_s32(reduced, output_multiplier); - // Rounding-shift-right. - using gemmlowp::RoundingDivideByPOT; - reduced = RoundingDivideByPOT(reduced, right_shift); - // Narrow values down to 16 bit signed. - const int16x4_t res16 = vqmovn_s32(reduced); - vst1_s16(output_data + c, res16); + } else if (batches == 4) { + const int right_shift = output_shift > 0 ? output_shift : 0; + const int left_shift = output_shift > 0 ? 0 : -output_shift; + for (int c = 0; c < output_depth; c += 4) { + const int8* shuffled_input_ptr = + reinterpret_cast(shuffled_input_workspace_data); + // Accumulation loop. + int32x4_t row_accum00 = vdupq_n_s32(0); + int32x4_t row_accum10 = vdupq_n_s32(0); + int32x4_t row_accum20 = vdupq_n_s32(0); + int32x4_t row_accum30 = vdupq_n_s32(0); + int32x4_t row_accum01 = vdupq_n_s32(0); + int32x4_t row_accum11 = vdupq_n_s32(0); + int32x4_t row_accum21 = vdupq_n_s32(0); + int32x4_t row_accum31 = vdupq_n_s32(0); + int32x4_t row_accum02 = vdupq_n_s32(0); + int32x4_t row_accum12 = vdupq_n_s32(0); + int32x4_t row_accum22 = vdupq_n_s32(0); + int32x4_t row_accum32 = vdupq_n_s32(0); + int32x4_t row_accum03 = vdupq_n_s32(0); + int32x4_t row_accum13 = vdupq_n_s32(0); + int32x4_t row_accum23 = vdupq_n_s32(0); + int32x4_t row_accum33 = vdupq_n_s32(0); + for (int d = 0; d < accum_depth; d += 16) { + int8x16_t weights0 = vld1q_s8(shuffled_weights_ptr + 0); + int8x16_t weights1 = vld1q_s8(shuffled_weights_ptr + 16); + int8x16_t weights2 = vld1q_s8(shuffled_weights_ptr + 32); + int8x16_t weights3 = vld1q_s8(shuffled_weights_ptr + 48); + shuffled_weights_ptr += 64; + int8x16_t input0 = vld1q_s8(shuffled_input_ptr + 0); + int8x16_t input1 = vld1q_s8(shuffled_input_ptr + 16); + int8x16_t input2 = vld1q_s8(shuffled_input_ptr + 32); + int8x16_t input3 = vld1q_s8(shuffled_input_ptr + 48); + shuffled_input_ptr += 64; + int16x8_t local_accum0, local_accum1, local_accum2, local_accum3; +#define TFLITE_SHUFFLED_FC_ACCUM(B) \ + local_accum0 = vmull_s8(vget_low_s8(weights0), vget_low_s8(input##B)); \ + local_accum1 = vmull_s8(vget_low_s8(weights1), vget_low_s8(input##B)); \ + local_accum2 = vmull_s8(vget_low_s8(weights2), vget_low_s8(input##B)); \ + local_accum3 = vmull_s8(vget_low_s8(weights3), vget_low_s8(input##B)); \ + local_accum0 = \ + vmlal_s8(local_accum0, vget_high_s8(weights0), vget_high_s8(input##B)); \ + local_accum1 = \ + vmlal_s8(local_accum1, vget_high_s8(weights1), vget_high_s8(input##B)); \ + local_accum2 = \ + vmlal_s8(local_accum2, vget_high_s8(weights2), vget_high_s8(input##B)); \ + local_accum3 = \ + vmlal_s8(local_accum3, vget_high_s8(weights3), vget_high_s8(input##B)); \ + row_accum0##B = vpadalq_s16(row_accum0##B, local_accum0); \ + row_accum1##B = vpadalq_s16(row_accum1##B, local_accum1); \ + row_accum2##B = vpadalq_s16(row_accum2##B, local_accum2); \ + row_accum3##B = vpadalq_s16(row_accum3##B, local_accum3); + + TFLITE_SHUFFLED_FC_ACCUM(0) + TFLITE_SHUFFLED_FC_ACCUM(1) + TFLITE_SHUFFLED_FC_ACCUM(2) + TFLITE_SHUFFLED_FC_ACCUM(3) + +#undef TFLITE_SHUFFLED_FC_ACCUM + } + // Horizontally reduce accumulators + +#define TFLITE_SHUFFLED_FC_STORE(B) \ + { \ + int32x2_t pairwise_reduced_acc_0, pairwise_reduced_acc_1, \ + pairwise_reduced_acc_2, pairwise_reduced_acc_3; \ + pairwise_reduced_acc_0 = \ + vpadd_s32(vget_low_s32(row_accum0##B), vget_high_s32(row_accum0##B)); \ + pairwise_reduced_acc_1 = \ + vpadd_s32(vget_low_s32(row_accum1##B), vget_high_s32(row_accum1##B)); \ + pairwise_reduced_acc_2 = \ + vpadd_s32(vget_low_s32(row_accum2##B), vget_high_s32(row_accum2##B)); \ + pairwise_reduced_acc_3 = \ + vpadd_s32(vget_low_s32(row_accum3##B), vget_high_s32(row_accum3##B)); \ + const int32x2_t reduced_lo = \ + vpadd_s32(pairwise_reduced_acc_0, pairwise_reduced_acc_1); \ + const int32x2_t reduced_hi = \ + vpadd_s32(pairwise_reduced_acc_2, pairwise_reduced_acc_3); \ + int32x4_t reduced = vcombine_s32(reduced_lo, reduced_hi); \ + int32x4_t bias_vec = vld1q_s32(bias_data + c); \ + reduced = vaddq_s32(reduced, bias_vec); \ + reduced = vshlq_s32(reduced, vdupq_n_s32(left_shift)); \ + reduced = vqrdmulhq_n_s32(reduced, output_multiplier); \ + using gemmlowp::RoundingDivideByPOT; \ + reduced = RoundingDivideByPOT(reduced, right_shift); \ + const int16x4_t res16 = vqmovn_s32(reduced); \ + vst1_s16(output_data + c + B * output_stride, res16); \ + } + + TFLITE_SHUFFLED_FC_STORE(0); + TFLITE_SHUFFLED_FC_STORE(1); + TFLITE_SHUFFLED_FC_STORE(2); + TFLITE_SHUFFLED_FC_STORE(3); + +#undef TFLITE_SHUFFLED_FC_STORE + } + } else { + TFLITE_DCHECK(false); + return; } #else - for (int c = 0; c < output_depth; c += 4) { - // Internal accumulation. - // Initialize accumulator with the bias-value. - int32 accum[4] = {0}; - // Accumulation loop. - for (int d = 0; d < accum_depth; d += 16) { - for (int i = 0; i < 4; i++) { - for (int j = 0; j < 16; j++) { - int8 input_val = input_data[d + j] - 128; - int8 weights_val = *shuffled_weights_ptr++; - accum[i] += weights_val * input_val; + if (batches == 1) { + int16* output_ptr = output_data; + // Shuffled weights have had their sign bit (0x80) pre-flipped (xor'd) + // so that just reinterpreting them as int8 values is equivalent to + // subtracting 128 from them, thus implementing for free the subtraction of + // the zero_point value 128. + const int8* shuffled_weights_ptr = + reinterpret_cast(shuffled_weights_data); + // Likewise, we preshuffled and pre-xored the input data above. + const int8* shuffled_input_data = + reinterpret_cast(shuffled_input_workspace_data); + for (int c = 0; c < output_depth; c += 4) { + // Internal accumulation. + // Initialize accumulator with the bias-value. + int32 accum[4] = {0}; + // Accumulation loop. + for (int d = 0; d < accum_depth; d += 16) { + for (int i = 0; i < 4; i++) { + for (int j = 0; j < 16; j++) { + int8 input_val = shuffled_input_data[d + j]; + int8 weights_val = *shuffled_weights_ptr++; + accum[i] += weights_val * input_val; + } } } + for (int i = 0; i < 4; i++) { + // Add bias value + int acc = accum[i] + bias_data[c + i]; + // Down-scale the final int32 accumulator to the scale used by our + // (16-bit, typically 3 integer bits) fixed-point format. The quantized + // multiplier and shift here have been pre-computed offline + // (e.g. by toco). + acc = MultiplyByQuantizedMultiplier(acc, output_multiplier, + -output_shift); + // Saturate, cast to int16, and store to output array. + acc = std::max(acc, -32768); + acc = std::min(acc, 32767); + output_ptr[c + i] = acc; + } } - for (int i = 0; i < 4; i++) { - // Add bias value - int acc = accum[i] + bias_data[c + i]; - // Down-scale the final int32 accumulator to the scale used by our - // (16-bit, typically 3 integer bits) fixed-point format. The quantized - // multiplier and shift here have been pre-computed offline - // (e.g. by toco). - acc = - MultiplyByQuantizedMultiplier(acc, output_multiplier, -output_shift); - // Saturate, cast to int16, and store to output array. - acc = std::max(acc, -32768); - acc = std::min(acc, 32767); - output_data[c + i] = acc; + } else if (batches == 4) { + int16* output_ptr = output_data; + // Shuffled weights have had their sign bit (0x80) pre-flipped (xor'd) + // so that just reinterpreting them as int8 values is equivalent to + // subtracting 128 from them, thus implementing for free the subtraction of + // the zero_point value 128. + const int8* shuffled_weights_ptr = + reinterpret_cast(shuffled_weights_data); + // Likewise, we preshuffled and pre-xored the input data above. + const int8* shuffled_input_data = + reinterpret_cast(shuffled_input_workspace_data); + for (int c = 0; c < output_depth; c += 4) { + const int8* shuffled_input_ptr = shuffled_input_data; + // Accumulation loop. + // Internal accumulation. + // Initialize accumulator with the bias-value. + int32 accum[4][4]; + for (int i = 0; i < 4; i++) { + for (int b = 0; b < 4; b++) { + accum[i][b] = 0; + } + } + for (int d = 0; d < accum_depth; d += 16) { + for (int i = 0; i < 4; i++) { + for (int b = 0; b < 4; b++) { + for (int j = 0; j < 16; j++) { + int8 input_val = shuffled_input_ptr[16 * b + j]; + int8 weights_val = shuffled_weights_ptr[16 * i + j]; + accum[i][b] += weights_val * input_val; + } + } + } + shuffled_input_ptr += 64; + shuffled_weights_ptr += 64; + } + for (int i = 0; i < 4; i++) { + for (int b = 0; b < 4; b++) { + // Add bias value + int acc = accum[i][b] + bias_data[c + i]; + // Down-scale the final int32 accumulator to the scale used by our + // (16-bit, typically 3 integer bits) fixed-point format. The + // quantized multiplier and shift here have been pre-computed offline + // (e.g. by toco). + acc = MultiplyByQuantizedMultiplier(acc, output_multiplier, + -output_shift); + // Saturate, cast to int16, and store to output array. + acc = std::max(acc, -32768); + acc = std::min(acc, 32767); + output_ptr[b * output_stride + c + i] = acc; + } + } } + } else { + TFLITE_DCHECK(false); + return; } #endif } @@ -1320,12 +1486,15 @@ inline void ExperimentalShuffledFullyConnectedWorkerImpl( // to allow using gemmlowp's threadpool. struct ExperimentalShuffledFullyConnectedWorkerTask : gemmlowp::Task { ExperimentalShuffledFullyConnectedWorkerTask( - const uint8* input_data, const int8* shuffled_weights_data, - int output_depth, int accum_depth, const int32* bias_data, - int32 output_multiplier, int output_shift, int16* output_data) + const uint8* input_data, const int8* shuffled_weights_data, int batches, + int output_depth, int output_stride, int accum_depth, + const int32* bias_data, int32 output_multiplier, int output_shift, + int16* output_data) : input_data_(input_data), shuffled_weights_data_(shuffled_weights_data), + batches_(batches), output_depth_(output_depth), + output_stride_(output_stride), accum_depth_(accum_depth), bias_data_(bias_data), output_multiplier_(output_multiplier), @@ -1334,13 +1503,16 @@ struct ExperimentalShuffledFullyConnectedWorkerTask : gemmlowp::Task { void Run() override { ExperimentalShuffledFullyConnectedWorkerImpl( - input_data_, shuffled_weights_data_, output_depth_, accum_depth_, - bias_data_, output_multiplier_, output_shift_, output_data_); + input_data_, shuffled_weights_data_, batches_, output_depth_, + output_stride_, accum_depth_, bias_data_, output_multiplier_, + output_shift_, output_data_); } const uint8* input_data_; const int8* shuffled_weights_data_; + int batches_; int output_depth_; + int output_stride_; int accum_depth_; const int32* bias_data_; int32 output_multiplier_; @@ -1354,7 +1526,7 @@ inline void ExperimentalShuffledFullyConnected( const int32* bias_data, const Dims<4>& bias_dims, int32 output_multiplier, int output_shift, int32 output_activation_min, int32 output_activation_max, int16* output_data, const Dims<4>& output_dims, - gemmlowp::GemmContext* gemm_context) { + uint8* shuffled_input_workspace_data, gemmlowp::GemmContext* gemm_context) { gemmlowp::ScopedProfilingLabel label( "ExperimentalShuffledFullyConnected/8bit"); (void)gemm_context; // only used in optimized code. @@ -1371,10 +1543,8 @@ inline void ExperimentalShuffledFullyConnected( const int accum_depth = ArraySize(weights_dims, 0); TFLITE_DCHECK(IsPackedWithoutStrides(input_dims)); TFLITE_DCHECK(IsPackedWithoutStrides(weights_dims)); - // The experimental shuffling is an optimization for matrix*vector product. - // We aren't interested in supporting non-matrix*vector-product cases, i.e. - // batches>1. - TFLITE_DCHECK_EQ(batches, 1); + TFLITE_DCHECK((accum_depth % 16) == 0); + TFLITE_DCHECK((output_depth % 4) == 0); // Shuffled weights have had their sign bit (0x80) pre-flipped (xor'd) // so that just reinterpreting them as int8 values is equivalent to // subtracting 128 from them, thus implementing for free the subtraction of @@ -1382,18 +1552,71 @@ inline void ExperimentalShuffledFullyConnected( const int8* int8_shuffled_weights_data = reinterpret_cast(shuffled_weights_data); - // Our GEMV kernel has 4 rows. This doesn't matter in practice for GEMV - // shapes, gemmlowp::HowManyThreads only takes that parameter because it - // matters for other kinds of GEMM shapes. + // Shuffling and xoring of input activations into the workspace buffer + if (batches == 1) { +#ifdef USE_NEON + const uint8x16_t signbit = vdupq_n_u8(0x80); + for (int i = 0; i < accum_depth; i += 16) { + uint8x16_t val = vld1q_u8(input_data + i); + val = veorq_u8(val, signbit); + vst1q_u8(shuffled_input_workspace_data + i, val); + } +#else + for (int i = 0; i < accum_depth; i++) { + shuffled_input_workspace_data[i] = input_data[i] ^ 0x80; + } +#endif + } else if (batches == 4) { + uint8* shuffled_input_workspace_ptr = shuffled_input_workspace_data; + int c = 0; +#ifdef USE_NEON + const uint8x16_t signbit = vdupq_n_u8(0x80); + for (c = 0; c < accum_depth; c += 16) { + const uint8* src_data_ptr = input_data + c; + uint8x16_t val0 = vld1q_u8(src_data_ptr + 0 * accum_depth); + uint8x16_t val1 = vld1q_u8(src_data_ptr + 1 * accum_depth); + uint8x16_t val2 = vld1q_u8(src_data_ptr + 2 * accum_depth); + uint8x16_t val3 = vld1q_u8(src_data_ptr + 3 * accum_depth); + val0 = veorq_u8(val0, signbit); + val1 = veorq_u8(val1, signbit); + val2 = veorq_u8(val2, signbit); + val3 = veorq_u8(val3, signbit); + vst1q_u8(shuffled_input_workspace_ptr + 0, val0); + vst1q_u8(shuffled_input_workspace_ptr + 16, val1); + vst1q_u8(shuffled_input_workspace_ptr + 32, val2); + vst1q_u8(shuffled_input_workspace_ptr + 48, val3); + shuffled_input_workspace_ptr += 64; + } +#else + for (c = 0; c < accum_depth; c += 16) { + for (int b = 0; b < 4; b++) { + const uint8* src_data_ptr = input_data + b * accum_depth + c; + for (int j = 0; j < 16; j++) { + uint8 src_val = *src_data_ptr++; + // Flip the sign bit, so that the kernel will only need to + // reinterpret these uint8 values as int8, getting for free the + // subtraction of the zero_point value 128. + uint8 dst_val = src_val ^ 0x80; + *shuffled_input_workspace_ptr++ = dst_val; + } + } + } +#endif + } else { + TFLITE_DCHECK(false); + return; + } + static constexpr int kKernelRows = 4; const int thread_count = gemmlowp::HowManyThreads( - gemm_context->max_num_threads(), output_depth, 1, accum_depth); + gemm_context->max_num_threads(), output_depth, batches, accum_depth); if (thread_count == 1) { // Single-thread case: do the computation on the current thread, don't // use a threadpool ExperimentalShuffledFullyConnectedWorkerImpl( - input_data, int8_shuffled_weights_data, output_depth, accum_depth, - bias_data, output_multiplier, output_shift, output_data); + shuffled_input_workspace_data, int8_shuffled_weights_data, batches, + output_depth, output_depth, accum_depth, bias_data, output_multiplier, + output_shift, output_data); return; } @@ -1406,8 +1629,9 @@ inline void ExperimentalShuffledFullyConnected( for (int i = 0; i < thread_count; i++) { int row_end = std::min(output_depth, row_start + kRowsPerWorker); tasks[i] = new ExperimentalShuffledFullyConnectedWorkerTask( - input_data, int8_shuffled_weights_data + row_start * accum_depth, - row_end - row_start, accum_depth, bias_data + row_start, + shuffled_input_workspace_data, + int8_shuffled_weights_data + row_start * accum_depth, batches, + row_end - row_start, output_depth, accum_depth, bias_data + row_start, output_multiplier, output_shift, output_data + row_start); row_start = row_end; } diff --git a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h index 49a93b0c6d..d1d4f54f86 100644 --- a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h @@ -608,8 +608,9 @@ inline void ExperimentalShuffledFullyConnected( const int32* bias_data, const Dims<4>& bias_dims, int32 output_multiplier, int output_shift, int32 output_activation_min, int32 output_activation_max, int16* output_data, const Dims<4>& output_dims, - gemmlowp::GemmContext* gemm_context) { + uint8* shuffled_input_workspace_data, gemmlowp::GemmContext* gemm_context) { (void)gemm_context; // only used in optimized code. + TFLITE_DCHECK_LE(output_activation_min, output_activation_max); // TODO(benoitjacob): This really should be: // const int batches = ArraySize(output_dims, 1); @@ -622,44 +623,130 @@ inline void ExperimentalShuffledFullyConnected( const int accum_depth = ArraySize(weights_dims, 0); TFLITE_DCHECK(IsPackedWithoutStrides(input_dims)); TFLITE_DCHECK(IsPackedWithoutStrides(weights_dims)); - // The experimental shuffling is an optimization for matrix*vector product. - // We aren't interested in supporting non-matrix*vector-product cases, i.e. - // batches>1. - TFLITE_DCHECK_EQ(batches, 1); - // Shuffled weights have had their sign bit (0x80) pre-flipped (xor'd) - // so that just reinterpreting them as int8 values is equivalent to - // subtracting 128 from them, thus implementing for free the subtraction of - // the zero_point value 128. - const int8* shuffled_weights_ptr = - reinterpret_cast(shuffled_weights_data); - for (int c = 0; c < output_depth; c += 4) { - // Internal accumulation. - // Initialize accumulator with the bias-value. - int32 accum[4] = {0}; - // Accumulation loop. - for (int d = 0; d < accum_depth; d += 16) { - for (int i = 0; i < 4; i++) { + TFLITE_DCHECK((accum_depth % 16) == 0); + TFLITE_DCHECK((output_depth % 4) == 0); + + // Shuffling and xoring of input activations into the workspace buffer + uint8* shuffled_input_workspace_ptr = shuffled_input_workspace_data; + if (batches == 1) { + for (int i = 0; i < accum_depth; i++) { + shuffled_input_workspace_data[i] = input_data[i] ^ 0x80; + } + } else if (batches == 4) { + for (int c = 0; c < accum_depth; c += 16) { + for (int b = 0; b < 4; b++) { + const uint8* src_data_ptr = input_data + b * accum_depth + c; for (int j = 0; j < 16; j++) { - int8 input_val = input_data[d + j] - 128; - int8 weights_val = *shuffled_weights_ptr++; - accum[i] += weights_val * input_val; + uint8 src_val = *src_data_ptr++; + // Flip the sign bit, so that the kernel will only need to + // reinterpret these uint8 values as int8, getting for free the + // subtraction of the zero_point value 128. + uint8 dst_val = src_val ^ 0x80; + *shuffled_input_workspace_ptr++ = dst_val; } } } - for (int i = 0; i < 4; i++) { - // Add bias value - int acc = accum[i] + bias_data[c + i]; - // Down-scale the final int32 accumulator to the scale used by our - // (16-bit, typically 3 integer bits) fixed-point format. The quantized - // multiplier and shift here have been pre-computed offline - // (e.g. by toco). - acc = - MultiplyByQuantizedMultiplier(acc, output_multiplier, -output_shift); - // Saturate, cast to int16, and store to output array. - acc = std::max(acc, output_activation_min); - acc = std::min(acc, output_activation_max); - output_data[c + i] = acc; + } else { + TFLITE_DCHECK(false); + return; + } + + // Actual computation + if (batches == 1) { + int16* output_ptr = output_data; + // Shuffled weights have had their sign bit (0x80) pre-flipped (xor'd) + // so that just reinterpreting them as int8 values is equivalent to + // subtracting 128 from them, thus implementing for free the subtraction of + // the zero_point value 128. + const int8* shuffled_weights_ptr = + reinterpret_cast(shuffled_weights_data); + // Likewise, we preshuffled and pre-xored the input data above. + const int8* shuffled_input_data = + reinterpret_cast(shuffled_input_workspace_data); + for (int c = 0; c < output_depth; c += 4) { + // Internal accumulation. + // Initialize accumulator with the bias-value. + int32 accum[4] = {0}; + // Accumulation loop. + for (int d = 0; d < accum_depth; d += 16) { + for (int i = 0; i < 4; i++) { + for (int j = 0; j < 16; j++) { + int8 input_val = shuffled_input_data[d + j]; + int8 weights_val = *shuffled_weights_ptr++; + accum[i] += weights_val * input_val; + } + } + } + for (int i = 0; i < 4; i++) { + // Add bias value + int acc = accum[i] + bias_data[c + i]; + // Down-scale the final int32 accumulator to the scale used by our + // (16-bit, typically 3 integer bits) fixed-point format. The quantized + // multiplier and shift here have been pre-computed offline + // (e.g. by toco). + acc = MultiplyByQuantizedMultiplier(acc, output_multiplier, + -output_shift); + // Saturate, cast to int16, and store to output array. + acc = std::max(acc, output_activation_min); + acc = std::min(acc, output_activation_max); + output_ptr[c + i] = acc; + } + } + } else if (batches == 4) { + int16* output_ptr = output_data; + // Shuffled weights have had their sign bit (0x80) pre-flipped (xor'd) + // so that just reinterpreting them as int8 values is equivalent to + // subtracting 128 from them, thus implementing for free the subtraction of + // the zero_point value 128. + const int8* shuffled_weights_ptr = + reinterpret_cast(shuffled_weights_data); + // Likewise, we preshuffled and pre-xored the input data above. + const int8* shuffled_input_data = + reinterpret_cast(shuffled_input_workspace_data); + for (int c = 0; c < output_depth; c += 4) { + const int8* shuffled_input_ptr = shuffled_input_data; + // Accumulation loop. + // Internal accumulation. + // Initialize accumulator with the bias-value. + int32 accum[4][4]; + for (int i = 0; i < 4; i++) { + for (int b = 0; b < 4; b++) { + accum[i][b] = 0; + } + } + for (int d = 0; d < accum_depth; d += 16) { + for (int i = 0; i < 4; i++) { + for (int b = 0; b < 4; b++) { + for (int j = 0; j < 16; j++) { + int8 input_val = shuffled_input_ptr[16 * b + j]; + int8 weights_val = shuffled_weights_ptr[16 * i + j]; + accum[i][b] += weights_val * input_val; + } + } + } + shuffled_input_ptr += 64; + shuffled_weights_ptr += 64; + } + for (int i = 0; i < 4; i++) { + for (int b = 0; b < 4; b++) { + // Add bias value + int acc = accum[i][b] + bias_data[c + i]; + // Down-scale the final int32 accumulator to the scale used by our + // (16-bit, typically 3 integer bits) fixed-point format. The + // quantized multiplier and shift here have been pre-computed offline + // (e.g. by toco). + acc = MultiplyByQuantizedMultiplier(acc, output_multiplier, + -output_shift); + // Saturate, cast to int16, and store to output array. + acc = std::max(acc, output_activation_min); + acc = std::min(acc, output_activation_max); + output_ptr[b * output_depth + c + i] = acc; + } + } } + } else { + TFLITE_DCHECK(false); + return; } } diff --git a/tensorflow/contrib/lite/toco/graph_transformations/experimental_shuffle_fc_weights.cc b/tensorflow/contrib/lite/toco/graph_transformations/experimental_shuffle_fc_weights.cc index f098981a5c..c00cdcb944 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/experimental_shuffle_fc_weights.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/experimental_shuffle_fc_weights.cc @@ -55,17 +55,26 @@ bool ExperimentalShuffleFCWeights::Run(Model* model, std::size_t op_index) { // Exit if, based on the known shapes, this FC op is not a GEMV. // The shuffling of FC weights is only useful to enable fast GEMV paths. const Shape& input_shape = input_array.shape(); - for (int i = 0; i < input_shape.dimensions_count() - 1; i++) { + for (int i = 1; i < input_shape.dimensions_count() - 1; i++) { if (input_shape.dims(i) != 1) { // The input activations, shaped as a matrix, have multiple columns. // This FC op isn't a matrix*vector multiplication. AddMessageF( "Not applying experimental shuffling to the weights of %s because " - "it's not a matrix*vector product", + "the input shape is not 1D or 2D (possibly with additional inner " + "dimensions of size 1)", LogName(*op)); return false; } } + if (input_shape.dims(0) != 1 && input_shape.dims(0) != 4) { + AddMessageF( + "Not applying experimental shuffling to the weights of %s because " + "the input shape's leading dimension, i.e. the 'batch size', is not " + "equal to 1 or 4", + LogName(*op)); + return false; + } // Exit if the weights shape isn't an integral multiple of the shuffled // block shape, 4x16. We don't want to have to write code dealing with // odd sizes, that would go un-exercised at the moment as the models @@ -129,6 +138,20 @@ bool ExperimentalShuffleFCWeights::Run(Model* model, std::size_t op_index) { fc_op->experimental_shuffled_weights = true; AddMessageF("Applied experimental shuffling to the weights of %s", LogName(*op)); + // Add a second output array to this FC op, serving as a workspace to perform + // runtime shuffling/xoring of its input activations. + CHECK_EQ(fc_op->outputs.size(), 1); + const string& shuffled_input_workspace_array_name = + AvailableArrayName(*model, fc_op->inputs[0] + "_shuffled"); + fc_op->outputs.push_back(shuffled_input_workspace_array_name); + auto& shuffled_input_workspace_array = + model->GetOrCreateArray(shuffled_input_workspace_array_name); + shuffled_input_workspace_array.data_type = input_array.data_type; + *shuffled_input_workspace_array.mutable_shape() = input_array.shape(); + shuffled_input_workspace_array.GetOrCreateMinMax() = input_array.GetMinMax(); + shuffled_input_workspace_array.GetOrCreateQuantizationParams() = + input_array.GetQuantizationParams(); + return true; } diff --git a/tensorflow/contrib/lite/toco/tooling_util.cc b/tensorflow/contrib/lite/toco/tooling_util.cc index cf2cbeedc7..5a341294db 100644 --- a/tensorflow/contrib/lite/toco/tooling_util.cc +++ b/tensorflow/contrib/lite/toco/tooling_util.cc @@ -1405,20 +1405,7 @@ void ResolveModelFlags(const ModelFlags& model_flags, Model* model) { } input_minmax.min = (qmin - mean_value) / std_value; input_minmax.max = (qmax - mean_value) / std_value; - if (input_array.minmax) { - if (input_array_proto.has_mean_value() || - input_array_proto.has_std_value()) { - const double width = input_minmax.max - input_minmax.min; - const double kMinMaxAllowedDiff = 1e-6 * width; - CHECK(std::abs(input_minmax.min - input_array.minmax->min) < - kMinMaxAllowedDiff && - std::abs(input_minmax.max - input_array.minmax->max) < - kMinMaxAllowedDiff) - << input_minmax.min << ", " << input_minmax.max - << " != " << input_array.minmax->min << ", " - << input_array.minmax->max; - } - } else { + if (!input_array.minmax) { input_array.GetOrCreateMinMax() = input_minmax; } } -- GitLab From 89ff74a7b25c01a511e84a805d3b2edf780142a6 Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Mon, 23 Apr 2018 12:03:19 -0700 Subject: [PATCH 152/434] [XLA] Disallow conversion from StatusOr to StatusOr if T is not convertible to U. PiperOrigin-RevId: 193962287 --- tensorflow/compiler/xla/statusor.h | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/tensorflow/compiler/xla/statusor.h b/tensorflow/compiler/xla/statusor.h index 641b5e9a6a..cccbce5fc8 100644 --- a/tensorflow/compiler/xla/statusor.h +++ b/tensorflow/compiler/xla/statusor.h @@ -113,17 +113,19 @@ class StatusOr : private internal_statusor::StatusOrData, StatusOr& operator=(StatusOr&&) = default; // Conversion copy/move constructor, T must be convertible from U. - // TODO(b/62186717): These should not participate in overload resolution if U - // is not convertible to T. - template + template ::value>::type* = nullptr> StatusOr(const StatusOr& other); - template + template ::value>::type* = nullptr> StatusOr(StatusOr&& other); // Conversion copy/move assignment operator, T must be convertible from U. - template + template ::value>::type* = nullptr> StatusOr& operator=(const StatusOr& other); - template + template ::value>::type* = nullptr> StatusOr& operator=(StatusOr&& other); // Constructs a new StatusOr with the given value. After calling this @@ -233,12 +235,14 @@ StatusOr& StatusOr::operator=(Status&& status) { } template -template +template ::value>::type*> inline StatusOr::StatusOr(const StatusOr& other) : Base(static_cast::Base&>(other)) {} template -template +template ::value>::type*> inline StatusOr& StatusOr::operator=(const StatusOr& other) { if (other.ok()) this->Assign(other.ValueOrDie()); @@ -248,12 +252,14 @@ inline StatusOr& StatusOr::operator=(const StatusOr& other) { } template -template +template ::value>::type*> inline StatusOr::StatusOr(StatusOr&& other) : Base(static_cast::Base&&>(other)) {} template -template +template ::value>::type*> inline StatusOr& StatusOr::operator=(StatusOr&& other) { if (other.ok()) { this->Assign(std::move(other).ValueOrDie()); -- GitLab From 4adc560844c4d769efdaeb5b67d5ace1e0df7b16 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 23 Apr 2018 12:21:29 -0700 Subject: [PATCH 153/434] Rewrite tail recursion in loop optimizer as loop to avoid stack overflow. PiperOrigin-RevId: 193965038 --- .../grappler/optimizers/loop_optimizer.cc | 70 +++++++++++-------- 1 file changed, 39 insertions(+), 31 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/loop_optimizer.cc b/tensorflow/core/grappler/optimizers/loop_optimizer.cc index fff06dd2ac..f7994221bb 100644 --- a/tensorflow/core/grappler/optimizers/loop_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/loop_optimizer.cc @@ -320,42 +320,50 @@ Status LoopInvariantNodeMotionOptimizer::RevertInvariantNodes() { return Status::OK(); } -Status LoopInvariantNodeMotionOptimizer::FindInvariantNodes(NodeDef* node) { - auto consumers = node_map_->GetOutputs(node->name()); - invariant_nodes_.insert(std::make_pair(node, consumers.size())); - for (auto* consumer : consumers) { - if (invariant_nodes_.count(consumer) || ModifiesFrameInfo(*consumer)) { - continue; - } - bool is_invariant = true; - for (const auto& input : consumer->input()) { - if (!IsControlInput(input)) { - const string name = NodeName(input); - auto* producer = node_map_->GetNode(name); - if (!invariant_nodes_.count(producer)) { - if (IsConstant(*producer)) { - invariant_nodes_.insert( - std::make_pair(producer, node_map_->GetOutputs(name).size())); - } else { - is_invariant = false; - break; - } - } +Status LoopInvariantNodeMotionOptimizer::FindInvariantNodes( + NodeDef* start_node) { + std::vector stack; + stack.reserve(32); + stack.push_back(start_node); + while (!stack.empty()) { + NodeDef* node = stack.back(); + stack.pop_back(); + auto consumers = node_map_->GetOutputs(node->name()); + invariant_nodes_.emplace(node, consumers.size()); + for (auto* consumer : consumers) { + if (invariant_nodes_.count(consumer) || ModifiesFrameInfo(*consumer)) { + continue; } - } - if (is_invariant) { - std::set producers; + bool is_invariant = true; for (const auto& input : consumer->input()) { - auto* producer = node_map_->GetNode(input); - producers.insert(producer); + if (!IsControlInput(input)) { + const string name = NodeName(input); + auto* producer = node_map_->GetNode(name); + if (!invariant_nodes_.count(producer)) { + if (IsConstant(*producer)) { + invariant_nodes_.insert( + std::make_pair(producer, node_map_->GetOutputs(name).size())); + } else { + is_invariant = false; + break; + } + } + } } - for (auto* producer : producers) { - auto iter = invariant_nodes_.find(producer); - if (iter != invariant_nodes_.end()) { - --iter->second; + if (is_invariant) { + std::set producers; + for (const auto& input : consumer->input()) { + auto* producer = node_map_->GetNode(input); + producers.insert(producer); + } + for (auto* producer : producers) { + auto iter = invariant_nodes_.find(producer); + if (iter != invariant_nodes_.end()) { + --iter->second; + } } + stack.push_back(consumer); } - TF_RETURN_IF_ERROR(FindInvariantNodes(consumer)); } } return Status::OK(); -- GitLab From 7de04c4cd9fb6a38b1b34d02fed14c89057bf002 Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Mon, 23 Apr 2018 12:21:57 -0700 Subject: [PATCH 154/434] Add TensorFlow format support to tf.keras.Model.save_weights and load_weights Supports restore-on-create in subclassed Models when executing eagerly, and removes the requirement that the Model be built before weights are loaded. Currently only subclassed Models work with the TensorFlow weight format. Graph networks will need a bit of extra logic to support the same topology/by-name distinction as the current HDF5 format (and for now they don't even add Checkpointable dependencies on their sub-layers). Some notes: - Checkpoints won't be numbered. This keeps behavior the same as for existing HDF5 weight saving. - All dependencies will be saved for subclassed Models, not just layers. This will make it more useful for training checkpoints (you can assign an optimizer to an attribute and save the slot variables that way). - Subclassed models won't support loading by flattened weight list from the TensorFlow format. Since there's no global naming for Layers (it's local to the Model), I think this is reasonable. PiperOrigin-RevId: 193965120 --- .../keras/_impl/keras/engine/base_layer.py | 9 + .../keras/_impl/keras/engine/network.py | 208 +++++++++++++--- .../keras/_impl/keras/engine/saving_test.py | 227 +++++++++++++++++- .../keras/_impl/keras/engine/training.py | 3 + .../_impl/keras/model_subclassing_test.py | 29 ++- .../python/training/checkpointable_utils.py | 12 +- .../api/golden/tensorflow.keras.-model.pbtxt | 2 +- .../golden/tensorflow.keras.-sequential.pbtxt | 2 +- .../tensorflow.keras.models.-model.pbtxt | 2 +- .../tensorflow.keras.models.-sequential.pbtxt | 2 +- tensorflow/tools/ci_build/ci_sanity.sh | 1 + 11 files changed, 438 insertions(+), 59 deletions(-) diff --git a/tensorflow/python/keras/_impl/keras/engine/base_layer.py b/tensorflow/python/keras/_impl/keras/engine/base_layer.py index 6c68d25127..abae6c3785 100644 --- a/tensorflow/python/keras/_impl/keras/engine/base_layer.py +++ b/tensorflow/python/keras/_impl/keras/engine/base_layer.py @@ -726,8 +726,17 @@ class Layer(checkpointable.CheckpointableBase): if hasattr(self, '_initial_weights') and self._initial_weights is not None: self.set_weights(self._initial_weights) del self._initial_weights + self._post_build_cleanup() return outputs + def _post_build_cleanup(self): + """Hooks to run after all sub-Layers are built.""" + # Note that in addition to Layer.__call__, this method is called by Model + # after building a graph network (which skips __call__). It should be called + # when possible if self.built may have switched from False to True, and is + # idempotent. + pass # No-op for Layers which don't override this method. + def apply(self, inputs, *args, **kwargs): """Apply the layer on a input. diff --git a/tensorflow/python/keras/_impl/keras/engine/network.py b/tensorflow/python/keras/_impl/keras/engine/network.py index 3b419dff3a..4127c781eb 100644 --- a/tensorflow/python/keras/_impl/keras/engine/network.py +++ b/tensorflow/python/keras/_impl/keras/engine/network.py @@ -22,11 +22,14 @@ from __future__ import print_function import copy import json import os +import weakref import numpy as np from six.moves import zip # pylint: disable=redefined-builtin +from tensorflow.python import pywrap_tensorflow from tensorflow.python.eager import context +from tensorflow.python.framework import errors_impl from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.keras._impl.keras import backend as K @@ -37,6 +40,7 @@ from tensorflow.python.keras._impl.keras.utils.io_utils import ask_to_proceed_wi from tensorflow.python.keras._impl.keras.utils.layer_utils import print_summary as print_layer_summary from tensorflow.python.platform import tf_logging as logging from tensorflow.python.training import checkpointable +from tensorflow.python.training import checkpointable_utils from tensorflow.python.util import nest from tensorflow.python.util import tf_inspect @@ -114,6 +118,13 @@ class Network(base_layer.Layer): self._outbound_nodes = [] self._inbound_nodes = [] + self._checkpointable_saver = checkpointable_utils.CheckpointableSaver( + weakref.ref(self)) + # A zero-argument function which should be called and set back to None as + # soon as the network is built (only applicable to subclassed Models). Runs + # restore operations when graph building. + self._in_progress_restore_finalizer = None + def _init_graph_network(self, inputs, outputs, name=None): self._uses_inputs_arg = True # Normalize and set self.inputs, self.outputs. @@ -1125,62 +1136,179 @@ class Network(base_layer.Layer): from tensorflow.python.keras._impl.keras.models import save_model # pylint: disable=g-import-not-at-top save_model(self, filepath, overwrite, include_optimizer) - def save_weights(self, filepath, overwrite=True): - """Dumps all layer weights to a HDF5 file. + def save_weights(self, filepath, overwrite=True, save_format=None): + """Saves all layer weights. + + Either saves in HDF5 or in TensorFlow format based on the `save_format` + argument. + + When saving in HDF5 format, the weight file has: + - `layer_names` (attribute), a list of strings + (ordered names of model layers). + - For every layer, a `group` named `layer.name` + - For every such layer group, a group attribute `weight_names`, + a list of strings + (ordered names of weights tensor of the layer). + - For every weight in the layer, a dataset + storing the weight value, named after the weight tensor. - The weight file has: - - `layer_names` (attribute), a list of strings - (ordered names of model layers). - - For every layer, a `group` named `layer.name` - - For every such layer group, a group attribute `weight_names`, - a list of strings - (ordered names of weights tensor of the layer). - - For every weight in the layer, a dataset - storing the weight value, named after the weight tensor. + Currently the TensorFlow format is only supported for user-defined classes + inheriting from `tf.keras.Model`, and not for networks constructed from + inputs and outputs (using `tf.keras.Model(inputs, outputs)`). + + When saving in TensorFlow format, all objects referenced by the network are + saved in the same format as `tf.train.Checkpoint`, including any `Layer`s or + `Optimizer`s assigned to attributes in the constructor. See + `tf.train.Checkpoint`'s documentation for details. Arguments: - filepath: String, path to the file to save the weights to. + filepath: String, path to the file to save the weights to. When saving + in TensorFlow format, this is the prefix used for checkpoint files + (multiple files are generated). Note that the '.h5' suffix causes + weights to be saved in HDF5 format. overwrite: Whether to silently overwrite any existing file at the target location, or provide the user with a manual prompt. + save_format: Either 'tf' or 'h5'. If `None`, defaults to 'tf' for + user-defined classes inheriting from `tf.keras.Model` and 'h5' for + networks constructed from inputs and outputs. `filepath`s ending in + '.h5' or '.keras' always default to HDF5. Currently only 'h5' is + supported for networks constructed from inputs and outputs. Once + supported, the default for all networks will switch to 'tf'. Raises: - ImportError: If h5py is not available. + ImportError: If h5py is not available when attempting to save in HDF5 + format. + ValueError: For invalid/unknown format arguments. """ - if h5py is None: - raise ImportError('`save_weights` requires h5py.') + filepath_is_h5 = filepath.endswith('.h5') or filepath.endswith('.keras') + if save_format is None: + if filepath_is_h5: + save_format = 'h5' + else: + if self._is_graph_network: + # TODO(allenl): Handle loading by weight index and fix dependencies, + # then enable 'tensorflow' format by default for graph networks. + save_format = 'h5' + else: + # Subclassed models save in TensorFlow format by default. + save_format = 'tf' + else: + user_format = save_format.lower().strip() + if user_format in ('tensorflow', 'tf'): + save_format = 'tf' + elif user_format in ('hdf5', 'h5', 'keras'): + save_format = 'h5' + else: + raise ValueError( + 'Unknown format "%s". Was expecting one of {"tf", "h5"}.' % ( + save_format,)) + if save_format == 'tf' and filepath_is_h5: + raise ValueError( + ('save_weights got save_format="tf"/"tensorflow", but the ' + 'filepath ("%s") looks like an HDF5 file. Omit the ".h5"/".keras" ' + 'when saving in TensorFlow format.') + % filepath) + + if save_format == 'h5' and h5py is None: + raise ImportError( + '`save_weights` requires h5py when saving in hdf5.') + if save_format == 'tf': + if self._is_graph_network: + raise NotImplementedError( + 'Networks constructed from inputs and outputs do not yet support ' + 'saving weights in the TensorFlow ("tf") save_format.') + check_filepath = filepath + '.index' + else: + check_filepath = filepath # If file exists and should not be overwritten: - if not overwrite and os.path.isfile(filepath): - proceed = ask_to_proceed_with_overwrite(filepath) + if not overwrite and os.path.isfile(check_filepath): + proceed = ask_to_proceed_with_overwrite(check_filepath) if not proceed: return - with h5py.File(filepath, 'w') as f: - saving.save_weights_to_hdf5_group(f, self.layers) + if save_format == 'h5': + with h5py.File(filepath, 'w') as f: + saving.save_weights_to_hdf5_group(f, self.layers) + else: + self._checkpointable_saver.save(filepath) def load_weights(self, filepath, by_name=False): - """Loads all layer weights from a HDF5 save file. - - If `by_name` is False (default) weights are loaded - based on the network's topology, meaning the architecture - should be the same as when the weights were saved. - Note that layers that don't have weights are not taken - into account in the topological ordering, so adding or - removing layers is fine as long as they don't have weights. - - If `by_name` is True, weights are loaded into layers - only if they share the same name. This is useful - for fine-tuning or transfer-learning models where + """Loads all layer weights, either from a TensorFlow or an HDF5 weight file. + + If `by_name` is False weights are loaded based on the network's + topology. This means the architecture should be the same as when the weights + were saved. Note that layers that don't have weights are not taken into + account in the topological ordering, so adding or removing layers is fine as + long as they don't have weights. + + If `by_name` is True, weights are loaded into layers only if they share the + same name. This is useful for fine-tuning or transfer-learning models where some of the layers have changed. + Only topological loading (`by_name=False`) is supported when loading weights + from the TensorFlow format. Note that topological loading differs slightly + between TensorFlow and HDF5 formats for user-defined classes inheriting from + `tf.keras.Model`: HDF5 loads based on a flattened list of weights, while the + TensorFlow format loads based on the object-local names of attributes to + which layers are assigned in the `Model`'s constructor. + Arguments: - filepath: String, path to the weights file to load. - by_name: Boolean, whether to load weights by name - or by topological order. + filepath: String, path to the weights file to load. For weight files in + TensorFlow format, this is the file prefix (the same as was passed + to `save_weights`). + by_name: Boolean, whether to load weights by name or by topological + order. Only topological loading is supported for weight files in + TensorFlow format. + + Returns: + When loading a weight file in TensorFlow format, returns the same status + object as `tf.train.Checkpoint.restore`. When graph building, restore + ops are run automatically as soon as the network is built (on first call + for user-defined classes inheriting from `Model`, immediately if it is + already built). + + When loading weights in HDF5 format, returns `None`. Raises: - ImportError: If h5py is not available. + ImportError: If h5py is not available and the weight file is in HDF5 + format. """ + if self._is_graph_network: + # Graph networks do not currently support TensorFlow formatted weight + # files. + save_format = 'h5' + else: + save_format = None + if save_format is None: + try: + pywrap_tensorflow.NewCheckpointReader(filepath) + save_format = 'tf' + except errors_impl.DataLossError: + # The checkpoint is not readable in TensorFlow format. Try HDF5. + save_format = 'h5' + if save_format == 'tf': + status = self._checkpointable_saver.restore(filepath) + if by_name: + raise NotImplementedError( + 'Weights may only be loaded based on topology into Models when ' + 'loading TensorFlow-formatted weights (got by_name=True to ' + 'load_weights).') + if not context.executing_eagerly(): + finalizer = status.run_restore_ops + if self.built: + finalizer() + else: + # Hold on to this status object until the network is built (for + # subclassed Models). Then we'll run restore ops if necessary. + self._in_progress_restore_finalizer = finalizer + return status if h5py is None: - raise ImportError('`load_weights` requires h5py.') + raise ImportError( + '`load_weights` requires h5py when loading weights from HDF5.') + if self._is_graph_network and not self.built: + raise NotImplementedError( + 'Unable to load weights saved in HDF5 format into a subclassed ' + 'Model which has not created its variables yet. Call the Model ' + 'first, then load the weights.') with h5py.File(filepath, 'r') as f: if 'layer_names' not in f.attrs and 'model_weights' in f: f = f['model_weights'] @@ -1189,6 +1317,14 @@ class Network(base_layer.Layer): else: saving.load_weights_from_hdf5_group(f, self.layers) + def _post_build_cleanup(self): + super(Network, self)._post_build_cleanup() + if self._in_progress_restore_finalizer is not None: + # Runs queued restore operations left over from load_weights when graph + # building. + self._in_progress_restore_finalizer() + self._in_progress_restore_finalizer = None + def _updated_config(self): """Util shared between different serialization methods. diff --git a/tensorflow/python/keras/_impl/keras/engine/saving_test.py b/tensorflow/python/keras/_impl/keras/engine/saving_test.py index 3b1578cddf..8764ae5e9c 100644 --- a/tensorflow/python/keras/_impl/keras/engine/saving_test.py +++ b/tensorflow/python/keras/_impl/keras/engine/saving_test.py @@ -24,7 +24,15 @@ import tempfile import numpy as np +from tensorflow.python.eager import context +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.framework import test_util from tensorflow.python.keras._impl import keras +from tensorflow.python.keras._impl.keras.engine import training +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import random_ops from tensorflow.python.platform import test from tensorflow.python.training import training as training_module @@ -55,12 +63,16 @@ class TestWeightSavingAndLoading(test.TestCase): with self.assertRaises(ValueError): model.set_weights(weights[::-1]) - if h5py is None: - return # Skip rest of test if H5py isn't available. - temp_dir = self.get_temp_dir() self.addCleanup(shutil.rmtree, temp_dir) + no_extension_path = os.path.join(temp_dir, 'test') + with self.assertRaises(NotImplementedError): + model.save_weights(no_extension_path, save_format='tensorflow') + + if h5py is None: + return # Skip rest of test if H5py isn't available. + h5_path = os.path.join(temp_dir, 'test.h5') model.save_weights(h5_path) model.load_weights(h5_path) @@ -71,6 +83,16 @@ class TestWeightSavingAndLoading(test.TestCase): y = model.predict(x) self.assertAllClose(ref_y, y) + model.save_weights(no_extension_path) + model.load_weights(no_extension_path) + y = model.predict(x) + self.assertAllClose(ref_y, y) + + model.save_weights(no_extension_path, save_format='hdf5') + model.load_weights(no_extension_path) + y = model.predict(x) + self.assertAllClose(ref_y, y) + def test_weight_preprocessing(self): input_dim = 3 output_dim = 3 @@ -457,5 +479,204 @@ class TestWholeModelSaving(test.TestCase): os.remove(fname) +class SubclassedModel(training.Model): + + def __init__(self): + super(SubclassedModel, self).__init__() + self.x_layer = keras.layers.Dense(3) + self.b_layer = keras.layers.Dense(1) + + def call(self, a): + return self.b_layer(self.x_layer(a)) + + +# TODO(allenl): The graph model tests in this TestCase are still saving in +# hdf5. Get them to save in tensorflow format. +class TestWeightSavingAndLoadingTFFormat(test.TestCase): + + @test_util.run_in_graph_and_eager_modes() + def test_tensorflow_format_overwrite(self): + with self.test_session() as session: + model = SubclassedModel() + temp_dir = self.get_temp_dir() + prefix = os.path.join(temp_dir, 'ckpt') + + x = constant_op.constant(np.random.random((3, 2)), dtype=dtypes.float32) + executing_eagerly = context.executing_eagerly() + model(x) # pylint: disable=not-callable + if not executing_eagerly: + session.run([v.initializer for v in model.variables]) + model.save_weights(prefix, save_format='tensorflow') + model.save_weights(prefix, save_format='tensorflow', overwrite=True) + with self.assertRaises(EOFError): + # Indirectly tests that the user is prompted + model.save_weights(prefix, save_format='tensorflow', overwrite=False) + + def test_no_graph_pollution(self): + with context.graph_mode(): + graph = ops.Graph() + with graph.as_default(), self.test_session(graph) as session: + model = SubclassedModel() + temp_dir = self.get_temp_dir() + prefix = os.path.join(temp_dir, 'ckpt') + + x = constant_op.constant(np.random.random((3, 2)), dtype=dtypes.float32) + model(x) # pylint: disable=not-callable + session.run([v.initializer for v in model.variables]) + model.save_weights(prefix, save_format='tensorflow') + op_count = len(graph.get_operations()) + model.save_weights(prefix, save_format='tensorflow') + self.assertEqual(len(graph.get_operations()), op_count) + + model.load_weights(prefix) + op_count = len(graph.get_operations()) + model.load_weights(prefix) + self.assertEqual(len(graph.get_operations()), op_count) + + def _weight_loading_test_template(self, make_model_fn): + with self.test_session() as session: + model = make_model_fn() + temp_dir = self.get_temp_dir() + prefix = os.path.join(temp_dir, 'ckpt') + + x = constant_op.constant(np.random.random((3, 2)), dtype=dtypes.float32) + executing_eagerly = context.executing_eagerly() + ref_y_tensor = model(x) + if not executing_eagerly: + session.run([v.initializer for v in model.variables]) + ref_y = self.evaluate(ref_y_tensor) + model.save_weights(prefix) + for v in model.variables: + self.evaluate( + v.assign(random_ops.random_normal(shape=array_ops.shape(v)))) + + self.addCleanup(shutil.rmtree, temp_dir) + + model.load_weights(prefix) + y = self.evaluate(model(x)) + self.assertAllClose(ref_y, y) + + # Test restore-on-create if this is a subclassed Model (graph Networks + # will have already created their variables). + load_model = make_model_fn() + load_model.load_weights(prefix) + restore_on_create_y_tensor = load_model(x) + restore_on_create_y = self.evaluate(restore_on_create_y_tensor) + self.assertAllClose(ref_y, restore_on_create_y) + + @test_util.run_in_graph_and_eager_modes() + def test_weight_loading_graph_model(self): + def _make_graph_model(): + a = keras.layers.Input(shape=(2,)) + x = keras.layers.Dense(3)(a) + b = keras.layers.Dense(1)(x) + return keras.models.Model(a, b) + + if h5py is None: + self.skipTest('This test only works with h5py.') + + self._weight_loading_test_template(_make_graph_model) + + @test_util.run_in_graph_and_eager_modes() + def test_weight_loading_subclassed_model(self): + self._weight_loading_test_template(SubclassedModel) + + def _new_layer_weight_loading_test_template( + self, first_model_fn, second_model_fn, restore_init_fn, by_name): + with self.test_session() as session: + model = first_model_fn() + temp_dir = self.get_temp_dir() + prefix = os.path.join(temp_dir, 'ckpt') + + x = constant_op.constant(np.random.random((3, 2)), dtype=dtypes.float32) + executing_eagerly = context.executing_eagerly() + ref_y_tensor = model(x) + if not executing_eagerly: + session.run([v.initializer for v in model.variables]) + ref_y = self.evaluate(ref_y_tensor) + model.save_weights(prefix) + for v in model.variables: + self.evaluate( + v.assign(random_ops.random_normal(shape=array_ops.shape(v)))) + + self.addCleanup(shutil.rmtree, temp_dir) + + second_model = second_model_fn() + second_model.load_weights(prefix, by_name=by_name) + second_model(x) + self.evaluate(restore_init_fn(second_model)) + second_model.save_weights(prefix) + # Check that the second model's checkpoint loads into the original model + model.load_weights(prefix, by_name=by_name) + y = self.evaluate(model(x)) + self.assertAllClose(ref_y, y) + + @test_util.run_in_graph_and_eager_modes() + def test_weight_loading_graph_model_added_layer(self): + def _save_graph_model(): + a = keras.layers.Input(shape=(2,)) + x = keras.layers.Dense(3, name='first')(a) + b = keras.layers.Dense(1, name='second')(x) + return keras.models.Model(a, b) + def _restore_graph_model(): + a = keras.layers.Input(shape=(2,)) + x = keras.layers.Dense(3, name='first')(a) + y = keras.layers.Dense(1, name='second')(x) + b = keras.layers.Dense(3, name='secondjr')(y) + return keras.models.Model(a, b) + def _restore_init_fn(restore_model): + return [v.initializer for v in restore_model.layers[-1].variables] + + if h5py is None: + self.skipTest('This test only works with h5py.') + + self._new_layer_weight_loading_test_template( + _save_graph_model, _restore_graph_model, + _restore_init_fn, by_name=True) + + @test_util.run_in_graph_and_eager_modes() + def test_weight_loading_graph_model_added_no_weight_layer(self): + def _save_graph_model(): + a = keras.layers.Input(shape=(2,)) + x = keras.layers.Dense(3, name='first')(a) + b = keras.layers.Dense(1, name='second')(x) + return keras.models.Model(a, b) + def _restore_graph_model(): + a = keras.layers.Input(shape=(2,)) + x = keras.layers.Dense(3, name='first')(a) + y = keras.layers.Dropout(rate=0.1)(x) + b = keras.layers.Dense(1, name='second')(y) + return keras.models.Model(a, b) + def _restore_init_fn(restore_model): + del restore_model # unused + return [] + if h5py is None: + self.skipTest('This test only works with h5py.') + + self._new_layer_weight_loading_test_template( + _save_graph_model, _restore_graph_model, + _restore_init_fn, by_name=False) + + @test_util.run_in_graph_and_eager_modes() + def test_weight_loading_subclassed_model_added_layer(self): + + class SubclassedModelRestore(training.Model): + + def __init__(self): + super(SubclassedModelRestore, self).__init__() + self.x_layer = keras.layers.Dense(3) + self.y_layer = keras.layers.Dense(3) + self.b_layer = keras.layers.Dense(1) + + def call(self, a): + return self.b_layer(self.y_layer(self.x_layer(a))) + + def _restore_init_fn(restore_model): + return [v.initializer for v in restore_model.y_layer.variables] + + self._new_layer_weight_loading_test_template( + SubclassedModel, SubclassedModelRestore, + _restore_init_fn, by_name=False) + if __name__ == '__main__': test.main() diff --git a/tensorflow/python/keras/_impl/keras/engine/training.py b/tensorflow/python/keras/_impl/keras/engine/training.py index 146e8fdac9..5f9b3e8c7d 100644 --- a/tensorflow/python/keras/_impl/keras/engine/training.py +++ b/tensorflow/python/keras/_impl/keras/engine/training.py @@ -584,6 +584,7 @@ class Model(Network): updates=updates, name='train_function', **self._function_kwargs) + self._post_build_cleanup() def _make_test_function(self): if not hasattr(self, 'test_function'): @@ -601,6 +602,7 @@ class Model(Network): updates=self.state_updates + self.metrics_updates, name='test_function', **self._function_kwargs) + self._post_build_cleanup() def _make_predict_function(self): if not hasattr(self, 'predict_function'): @@ -619,6 +621,7 @@ class Model(Network): updates=self.state_updates, name='predict_function', **kwargs) + self._post_build_cleanup() def _standardize_user_data(self, x, diff --git a/tensorflow/python/keras/_impl/keras/model_subclassing_test.py b/tensorflow/python/keras/_impl/keras/model_subclassing_test.py index bc8698f235..295ad47f6b 100644 --- a/tensorflow/python/keras/_impl/keras/model_subclassing_test.py +++ b/tensorflow/python/keras/_impl/keras/model_subclassing_test.py @@ -19,7 +19,6 @@ from __future__ import division from __future__ import print_function import os -import tempfile import numpy as np import six @@ -420,8 +419,6 @@ class ModelSubclassingTest(test.TestCase): @test_util.run_in_graph_and_eager_modes() def test_saving(self): - if h5py is None: - return # Skip test if models cannot be saved. num_classes = (2, 3) num_samples = 100 @@ -437,20 +434,30 @@ class ModelSubclassingTest(test.TestCase): model.fit([x1, x2], [y1, y2], epochs=2, batch_size=32, verbose=0) y_ref_1, y_ref_2 = model.predict([x1, x2]) - fd, fname = tempfile.mkstemp('.h5') - model.save_weights(fname) + tf_format_name = os.path.join(self.get_temp_dir(), 'ckpt') + model.save_weights(tf_format_name) + if h5py is not None: + hdf5_format_name = os.path.join(self.get_temp_dir(), 'weights.h5') + model.save_weights(hdf5_format_name) model = MultiIOTestModel(num_classes=num_classes, use_bn=True) - # need to build the model before loading weights - # (otherwise no weights to load) - model._set_inputs([x1, x2]) - model.load_weights(fname) + + if h5py is not None: + with self.assertRaises(ValueError): + model.load_weights(hdf5_format_name) + + model.load_weights(tf_format_name) y1, y2 = model.predict([x1, x2]) self.assertAllClose(y_ref_1, y1, atol=1e-5) self.assertAllClose(y_ref_2, y2, atol=1e-5) - os.close(fd) - os.remove(fname) + + if h5py is not None: + model.load_weights(hdf5_format_name) + + y1, y2 = model.predict([x1, x2]) + self.assertAllClose(y_ref_1, y1, atol=1e-5) + self.assertAllClose(y_ref_2, y2, atol=1e-5) @test_util.run_in_graph_and_eager_modes() def test_summary(self): diff --git a/tensorflow/python/training/checkpointable_utils.py b/tensorflow/python/training/checkpointable_utils.py index 4769e15120..13bd89d907 100644 --- a/tensorflow/python/training/checkpointable_utils.py +++ b/tensorflow/python/training/checkpointable_utils.py @@ -616,11 +616,10 @@ class CheckpointableSaver(object): # Allow passing in a weak reference to avoid reference cycles when # `Checkpointable` objects save themselves. self._root_checkpointable_ref = root_checkpointable - if not context.executing_eagerly(): - with ops.device("/cpu:0"): - self._file_prefix_placeholder = constant_op.constant("model") - else: - self._file_prefix_placeholder = None + # The file prefix placeholder is created lazily when graph building (and not + # at all when executing eagerly) to avoid creating ops in the constructor + # (when they may never be necessary). + self._file_prefix_placeholder = None # Op caching for save self._object_graph_feed_tensor = None @@ -778,6 +777,9 @@ class CheckpointableSaver(object): return InitializationOnlyStatus(self._root_checkpointable) in_graph_mode = not context.executing_eagerly() if in_graph_mode: + if self._file_prefix_placeholder is None: + with ops.device("/cpu:0"): + self._file_prefix_placeholder = constant_op.constant("model") file_prefix_tensor = self._file_prefix_placeholder file_prefix_feed_dict = {self._file_prefix_placeholder: save_path} else: diff --git a/tensorflow/tools/api/golden/tensorflow.keras.-model.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.-model.pbtxt index cdf2da712f..cee76bdc1d 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.-model.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.-model.pbtxt @@ -239,7 +239,7 @@ tf_class { } member_method { name: "save_weights" - argspec: "args=[\'self\', \'filepath\', \'overwrite\'], varargs=None, keywords=None, defaults=[\'True\'], " + argspec: "args=[\'self\', \'filepath\', \'overwrite\', \'save_format\'], varargs=None, keywords=None, defaults=[\'True\', \'None\'], " } member_method { name: "set_weights" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.-sequential.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.-sequential.pbtxt index 5c2c29e60f..02718cb5f9 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.-sequential.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.-sequential.pbtxt @@ -256,7 +256,7 @@ tf_class { } member_method { name: "save_weights" - argspec: "args=[\'self\', \'filepath\', \'overwrite\'], varargs=None, keywords=None, defaults=[\'True\'], " + argspec: "args=[\'self\', \'filepath\', \'overwrite\', \'save_format\'], varargs=None, keywords=None, defaults=[\'True\', \'None\'], " } member_method { name: "set_weights" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.models.-model.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.models.-model.pbtxt index b3f3f16922..dd78384005 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.models.-model.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.models.-model.pbtxt @@ -239,7 +239,7 @@ tf_class { } member_method { name: "save_weights" - argspec: "args=[\'self\', \'filepath\', \'overwrite\'], varargs=None, keywords=None, defaults=[\'True\'], " + argspec: "args=[\'self\', \'filepath\', \'overwrite\', \'save_format\'], varargs=None, keywords=None, defaults=[\'True\', \'None\'], " } member_method { name: "set_weights" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.models.-sequential.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.models.-sequential.pbtxt index 4ac6811bac..9fcb03f47e 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.models.-sequential.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.models.-sequential.pbtxt @@ -256,7 +256,7 @@ tf_class { } member_method { name: "save_weights" - argspec: "args=[\'self\', \'filepath\', \'overwrite\'], varargs=None, keywords=None, defaults=[\'True\'], " + argspec: "args=[\'self\', \'filepath\', \'overwrite\', \'save_format\'], varargs=None, keywords=None, defaults=[\'True\', \'None\'], " } member_method { name: "set_weights" diff --git a/tensorflow/tools/ci_build/ci_sanity.sh b/tensorflow/tools/ci_build/ci_sanity.sh index 9627475d84..8e8b2191e5 100755 --- a/tensorflow/tools/ci_build/ci_sanity.sh +++ b/tensorflow/tools/ci_build/ci_sanity.sh @@ -101,6 +101,7 @@ do_pylint() { "^tensorflow/contrib/eager/python/metrics_impl\.py.*\[E0202.*method-hidden "\ "^tensorflow/python/platform/gfile\.py.*\[E0301.*non-iterator "\ "^tensorflow/python/keras/_impl/keras/callbacks\.py.*\[E1133.*not-an-iterable "\ +"^tensorflow/python/keras/_impl/keras/engine/base_layer.py.*\[E0203.*access-member-before-definition "\ "^tensorflow/python/keras/_impl/keras/layers/recurrent\.py.*\[E0203.*access-member-before-definition "\ "^tensorflow/python/kernel_tests/constant_op_eager_test.py.*\[E0303.*invalid-length-returned" -- GitLab From 06d5ca2ae097c08c886759dd27f90b19e4c6f49d Mon Sep 17 00:00:00 2001 From: Andy Kernahan Date: Mon, 23 Apr 2018 20:32:35 +0100 Subject: [PATCH 155/434] Fix tfcompile module label. (#16582) --- tensorflow/docs_src/performance/xla/tfcompile.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/docs_src/performance/xla/tfcompile.md b/tensorflow/docs_src/performance/xla/tfcompile.md index f57ca3948d..8521d7eacb 100644 --- a/tensorflow/docs_src/performance/xla/tfcompile.md +++ b/tensorflow/docs_src/performance/xla/tfcompile.md @@ -86,7 +86,7 @@ code. `tf_library` utilizes `tfcompile` to compile the TensorFlow graph into executable code. ```build -load("//third_party/tensorflow/compiler/aot:tfcompile.bzl", "tf_library") +load("//tensorflow/compiler/aot:tfcompile.bzl", "tf_library") # Use the tf_library macro to compile your graph into executable code. tf_library( @@ -258,8 +258,8 @@ file. ```build # Example of linking your binary -# Also see //third_party/tensorflow/compiler/aot/tests/BUILD -load("//third_party/tensorflow/compiler/aot:tfcompile.bzl", "tf_library") +# Also see //tensorflow/compiler/aot/tests/BUILD +load("//tensorflow/compiler/aot:tfcompile.bzl", "tf_library") # The same tf_library call from step 2 above. tf_library( -- GitLab From d9191b881fc283d93a8eaa4961c5e16f2205311f Mon Sep 17 00:00:00 2001 From: Martin Wicke Date: Mon, 23 Apr 2018 12:35:35 -0700 Subject: [PATCH 156/434] Re-enable metrics_test, increase sharding. PiperOrigin-RevId: 193967074 --- tensorflow/python/kernel_tests/BUILD | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index 8628ca5d40..ebbec39cf3 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -2877,11 +2877,8 @@ tf_py_test( "//tensorflow/python:random_ops", "//tensorflow/python:variables", ], - shard_count = 10, - tags = [ - "no_windows_gpu", - "noasan", - ], + shard_count = 20, + tags = ["no_windows_gpu"], ) tf_py_test( -- GitLab From 594c1c60f523ba4dd45545876e850ca7281be73a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 23 Apr 2018 13:12:58 -0700 Subject: [PATCH 157/434] Entropy bottleneck class. PiperOrigin-RevId: 193972549 --- tensorflow/contrib/BUILD | 2 +- tensorflow/contrib/cmake/python_modules.txt | 1 + .../contrib/cmake/tf_core_kernels.cmake | 1 + tensorflow/contrib/coder/BUILD | 56 +- tensorflow/contrib/coder/__init__.py | 3 +- .../coder/python/layers/entropybottleneck.py | 697 ++++++++++++++++++ .../python/layers/entropybottleneck_test.py | 315 ++++++++ 7 files changed, 1071 insertions(+), 4 deletions(-) create mode 100644 tensorflow/contrib/coder/python/layers/entropybottleneck.py create mode 100644 tensorflow/contrib/coder/python/layers/entropybottleneck_test.py diff --git a/tensorflow/contrib/BUILD b/tensorflow/contrib/BUILD index d28392a62c..8edb8654b8 100644 --- a/tensorflow/contrib/BUILD +++ b/tensorflow/contrib/BUILD @@ -29,7 +29,7 @@ py_library( "//tensorflow/contrib/cloud:cloud_py", "//tensorflow/contrib/cluster_resolver:cluster_resolver_pip", "//tensorflow/contrib/cluster_resolver:cluster_resolver_py", - "//tensorflow/contrib/coder:coder_ops_py", + "//tensorflow/contrib/coder:coder_py", "//tensorflow/contrib/compiler:compiler_py", "//tensorflow/contrib/copy_graph:copy_graph_py", "//tensorflow/contrib/crf:crf_py", diff --git a/tensorflow/contrib/cmake/python_modules.txt b/tensorflow/contrib/cmake/python_modules.txt index fbcdf7e753..932a6eeeaa 100644 --- a/tensorflow/contrib/cmake/python_modules.txt +++ b/tensorflow/contrib/cmake/python_modules.txt @@ -144,6 +144,7 @@ tensorflow/contrib/coder tensorflow/contrib/coder/kernels tensorflow/contrib/coder/ops tensorflow/contrib/coder/python +tensorflow/contrib/coder/python/layers tensorflow/contrib/coder/python/ops tensorflow/contrib/compiler tensorflow/contrib/copy_graph diff --git a/tensorflow/contrib/cmake/tf_core_kernels.cmake b/tensorflow/contrib/cmake/tf_core_kernels.cmake index ed018b4fed..376496b33f 100644 --- a/tensorflow/contrib/cmake/tf_core_kernels.cmake +++ b/tensorflow/contrib/cmake/tf_core_kernels.cmake @@ -63,6 +63,7 @@ if(tensorflow_BUILD_CONTRIB_KERNELS) "${tensorflow_source_dir}/tensorflow/contrib/boosted_trees/ops/split_handler_ops.cc" "${tensorflow_source_dir}/tensorflow/contrib/boosted_trees/ops/stats_accumulator_ops.cc" "${tensorflow_source_dir}/tensorflow/contrib/boosted_trees/ops/training_ops.cc" + "${tensorflow_source_dir}/tensorflow/contrib/coder/kernels/pmf_to_cdf_op.cc" "${tensorflow_source_dir}/tensorflow/contrib/coder/kernels/range_coder.cc" "${tensorflow_source_dir}/tensorflow/contrib/coder/kernels/range_coder_ops.cc" "${tensorflow_source_dir}/tensorflow/contrib/coder/kernels/range_coder_ops_util.cc" diff --git a/tensorflow/contrib/coder/BUILD b/tensorflow/contrib/coder/BUILD index 9ca4ce8a9c..a146460a9c 100644 --- a/tensorflow/contrib/coder/BUILD +++ b/tensorflow/contrib/coder/BUILD @@ -1,5 +1,5 @@ # Description: -# Contains entropy coding related modules. +# Contains tools related to data compression. package(default_visibility = [ "//learning/brain:__subpackages__", @@ -152,10 +152,21 @@ tf_gen_op_wrapper_py( deps = [":coder_ops_op_lib"], ) +py_library( + name = "coder_py", + srcs = [ + "__init__.py", + ], + srcs_version = "PY2AND3", + deps = [ + ":coder_ops_py", + ":entropybottleneck_py", + ], +) + tf_custom_op_py_library( name = "coder_ops_py", srcs = [ - "__init__.py", "python/ops/coder_ops.py", ], dso = [ @@ -186,3 +197,44 @@ tf_py_test( ], main = "python/ops/coder_ops_test.py", ) + +py_library( + name = "entropybottleneck_py", + srcs = [ + "python/layers/entropybottleneck.py", + ], + srcs_version = "PY2AND3", + deps = [ + ":coder_ops_py", + "//tensorflow/python:array_ops", + "//tensorflow/python:constant_op", + "//tensorflow/python:dtypes", + "//tensorflow/python:functional_ops", + "//tensorflow/python:init_ops", + "//tensorflow/python:math_ops", + "//tensorflow/python:nn", + "//tensorflow/python:ops", + "//tensorflow/python:random_ops", + "//tensorflow/python:state_ops", + "//tensorflow/python:summary_ops", + "//tensorflow/python:tensor_shape", + "//tensorflow/python:variable_scope", + "//tensorflow/python/eager:context", + "//tensorflow/python/keras:engine", + "//third_party/py/numpy", + ], +) + +tf_py_test( + name = "entropybottleneck_py_test", + srcs = [ + "python/layers/entropybottleneck_test.py", + ], + additional_deps = [ + ":entropybottleneck_py", + "//tensorflow/python:client_testlib", + "//tensorflow/python:variables", + "//tensorflow/python:training", + ], + main = "python/layers/entropybottleneck_test.py", +) diff --git a/tensorflow/contrib/coder/__init__.py b/tensorflow/contrib/coder/__init__.py index b7e663e6f1..99b8ac7595 100644 --- a/tensorflow/contrib/coder/__init__.py +++ b/tensorflow/contrib/coder/__init__.py @@ -12,13 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Entropy code operations.""" +"""Data compression tools.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function # pylint: disable=wildcard-import +from tensorflow.contrib.coder.python.layers.entropybottleneck import * from tensorflow.contrib.coder.python.ops.coder_ops import * # pylint: enable=wildcard-import diff --git a/tensorflow/contrib/coder/python/layers/entropybottleneck.py b/tensorflow/contrib/coder/python/layers/entropybottleneck.py new file mode 100644 index 0000000000..f039cb0f52 --- /dev/null +++ b/tensorflow/contrib/coder/python/layers/entropybottleneck.py @@ -0,0 +1,697 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Entropy bottleneck layer.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.contrib.coder.python.ops import coder_ops + +from tensorflow.python.eager import context +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_shape +from tensorflow.python.keras._impl.keras import engine +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import functional_ops +from tensorflow.python.ops import init_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import nn +from tensorflow.python.ops import random_ops +from tensorflow.python.ops import state_ops +from tensorflow.python.ops import variable_scope +from tensorflow.python.summary import summary + + +class EntropyBottleneck(engine.Layer): + """Entropy bottleneck layer. + + This layer can be used to model the entropy (the amount of information + conveyed) of the tensor passing through it. During training, this can be used + to impose a (soft) entropy constraint on its activations, limiting the amount + of information flowing through the layer. Note that this is distinct from + other types of bottlenecks, which reduce the dimensionality of the space, for + example. Dimensionality reduction does not limit the amount of information, + and does not enable efficient data compression per se. + + After training, this layer can be used to compress any input tensor to a + string, which may be written to a file, and to decompress a file which it + previously generated back to a reconstructed tensor (possibly on a different + machine having access to the same model checkpoint). The entropies estimated + during training or evaluation are approximately equal to the average length of + the strings in bits. + + The layer implements a flexible probability density model to estimate entropy, + which is described in the appendix of the paper (please cite the paper if you + use this code for scientific work): + + "Variational image compression with a scale hyperprior" + + Johannes Ballé, David Minnen, Saurabh Singh, Sung Jin Hwang, Nick Johnston + + https://arxiv.org/abs/1802.01436 + + The layer assumes that the input tensor is at least 2D, with a batch dimension + at the beginning and a channel dimension as specified by `data_format`. The + layer trains an independent probability density model for each channel, but + assumes that across all other dimensions, the inputs are i.i.d. (independent + and identically distributed). Because the entropy (and hence, average + codelength) is a function of the densities, this assumption may have a direct + effect on the compression performance. + + Because data compression always involves discretization, the outputs of the + layer are generally only approximations of its inputs. During training, + discretization is modeled using additive uniform noise to ensure + differentiability. The entropies computed during training are differential + entropies. During evaluation, the data is actually quantized, and the + entropies are discrete (Shannon entropies). To make sure the approximated + tensor values are good enough for practical purposes, the training phase must + be used to balance the quality of the approximation with the entropy, by + adding an entropy term to the training loss, as in the following example. + + Here, we use the entropy bottleneck to compress the latent representation of + an autoencoder. The data vectors `x` in this case are 4D tensors in + `'channels_last'` format (for example, 16x16 pixel grayscale images). + + The layer always produces exactly one auxiliary loss and one update op which + are only significant for compression and decompression. To use the compression + feature, the auxiliary loss must be minimized during or after training. After + that, the update op must be executed at least once. Here, we simply attach + them to the main training step. + + Training: + ``` + # Build autoencoder. + x = tf.placeholder(tf.float32, shape=[None, 16, 16, 1]) + y = forward_transform(x) + entropy_bottleneck = EntropyBottleneck() + y_, likelihoods = entropy_bottleneck(y, training=True) + x_ = backward_transform(y_) + + # Information content (= predicted codelength) in bits of each batch element + # (note that taking the natural logarithm and dividing by `log(2)` is + # equivalent to taking base-2 logarithms): + bits = tf.reduce_sum(tf.log(likelihoods), axis=(1, 2, 3)) / -np.log(2) + + # Squared difference of each batch element: + squared_error = tf.reduce_sum(tf.squared_difference(x, x_), axis=(1, 2, 3)) + + # The loss is a weighted sum of mean squared error and entropy (average + # information content), where the weight controls the trade-off between + # approximation error and entropy. + main_loss = 0.5 * tf.reduce_mean(squared_error) + tf.reduce_mean(bits) + + # Minimize loss and auxiliary loss, and execute update op. + main_optimizer = tf.train.AdamOptimizer(learning_rate=1e-4) + main_step = optimizer.minimize(main_loss) + # 1e-2 is a good starting point for the learning rate of the auxiliary loss, + # assuming Adam is used. + aux_optimizer = tf.train.AdamOptimizer(learning_rate=1e-2) + aux_step = optimizer.minimize(entropy_bottleneck.losses[0]) + step = tf.group(main_step, aux_step, entropy_bottleneck.updates[0]) + ``` + + Evaluation: + ``` + # Build autoencoder. + x = tf.placeholder(tf.float32, shape=[None, 16, 16, 1]) + y = forward_transform(x) + y_, likelihoods = EntropyBottleneck()(y, training=False) + x_ = backward_transform(y_) + + # Information content (= predicted codelength) in bits of each batch element: + bits = tf.reduce_sum(tf.log(likelihoods), axis=(1, 2, 3)) / -np.log(2) + + # Squared difference of each batch element: + squared_error = tf.reduce_sum(tf.squared_difference(x, x_), axis=(1, 2, 3)) + + # The loss is a weighted sum of mean squared error and entropy (average + # information content), where the weight controls the trade-off between + # approximation error and entropy. + loss = 0.5 * tf.reduce_mean(squared_error) + tf.reduce_mean(bits) + ``` + + To be able to compress the bottleneck tensor and decompress it in a different + session, or on a different machine, you need three items: + - The compressed representations stored as strings. + - The shape of the bottleneck for these string representations as a `Tensor`, + as well as the number of channels of the bottleneck at graph construction + time. + - The checkpoint of the trained model that was used for compression. Note: + It is crucial that the auxiliary loss produced by this layer is minimized + during or after training, and that the update op is run after training and + minimization of the auxiliary loss, but *before* the checkpoint is saved. + + Compression: + ``` + x = tf.placeholder(tf.float32, shape=[None, 16, 16, 1]) + y = forward_transform(x) + strings = EntropyBottleneck().compress(y) + shape = tf.shape(y)[1:] + ``` + + Decompression: + ``` + strings = tf.placeholder(tf.string, shape=[None]) + shape = tf.placeholder(tf.int32, shape=[3]) + entropy_bottleneck = EntropyBottleneck(dtype=tf.float32) + y_ = entropy_bottleneck.decompress(strings, shape, channels=5) + x_ = backward_transform(y_) + ``` + Here, we assumed that the tensor produced by the forward transform has 5 + channels. + + The above four use cases can also be implemented within the same session (i.e. + on the same `EntropyBottleneck` instance), for testing purposes, etc., by + calling the object more than once. + + Arguments: + init_scale: Float. A scaling factor determining the initial width of the + probability densities. This should be chosen big enough so that the + range of values of the layer inputs roughly falls within the interval + [`-init_scale`, `init_scale`] at the beginning of training. + filters: An iterable of ints, giving the number of filters at each layer of + the density model. Generally, the more filters and layers, the more + expressive is the density model in terms of modeling more complicated + distributions of the layer inputs. For details, refer to the paper + referenced above. The default is `[3, 3, 3]`, which should be sufficient + for most practical purposes. + tail_mass: Float, between 0 and 1. The bottleneck layer automatically + determines the range of input values that should be represented based on + their frequency of occurrence. Values occurring in the tails of the + distributions will be clipped to that range during compression. + `tail_mass` determines the amount of probability mass in the tails which + is cut off in the worst case. For example, the default value of `1e-9` + means that at most 1 in a billion input samples will be clipped to the + range. + optimize_integer_offset: Boolean. Typically, the input values of this layer + are floats, which means that quantization during evaluation can be + performed with an arbitrary offset. By default, the layer determines that + offset automatically. In special situations, such as when it is known that + the layer will receive only full integer values during evaluation, it can + be desirable to set this argument to `False` instead, in order to always + quantize to full integer values. + likelihood_bound: Float. If positive, the returned likelihood values are + ensured to be greater than or equal to this value. This prevents very + large gradients with a typical entropy loss (defaults to 1e-9). + range_coder_precision: Integer, between 1 and 16. The precision of the range + coder used for compression and decompression. This trades off computation + speed with compression efficiency, where 16 is the slowest but most + efficient setting. Choosing lower values may increase the average + codelength slightly compared to the estimated entropies. + data_format: Either `'channels_first'` or `'channels_last'` (default). + trainable: Boolean. Whether the layer should be trained. + name: String. The name of the layer. + dtype: Default dtype of the layer's parameters (default of `None` means use + the type of the first input). + + Read-only properties: + init_scale: See above. + filters: See above. + tail_mass: See above. + optimize_integer_offset: See above. + likelihood_bound: See above. + range_coder_precision: See above. + data_format: See above. + name: String. See above. + dtype: See above. + trainable_variables: List of trainable variables. + non_trainable_variables: List of non-trainable variables. + variables: List of all variables of this layer, trainable and non-trainable. + updates: List of update ops of this layer. Always contains exactly one + update op, which must be run once after the last training step, before + `compress` or `decompress` is used. + losses: List of losses added by this layer. Always contains exactly one + auxiliary loss, which must be added to the training loss. + + Mutable properties: + trainable: Boolean. Whether the layer should be trained. + input_spec: Optional `InputSpec` object specifying the constraints on inputs + that can be accepted by the layer. + """ + + def __init__(self, init_scale=10, filters=(3, 3, 3), tail_mass=1e-9, + optimize_integer_offset=True, likelihood_bound=1e-9, + range_coder_precision=16, data_format="channels_last", **kwargs): + super(EntropyBottleneck, self).__init__(**kwargs) + self._init_scale = float(init_scale) + self._filters = tuple(int(f) for f in filters) + self._tail_mass = float(tail_mass) + if not 0 < self.tail_mass < 1: + raise ValueError( + "`tail_mass` must be between 0 and 1, got {}.".format(self.tail_mass)) + self._optimize_integer_offset = bool(optimize_integer_offset) + self._likelihood_bound = float(likelihood_bound) + self._range_coder_precision = int(range_coder_precision) + self._data_format = data_format + self._channel_axis(2) # trigger ValueError early + self.input_spec = engine.InputSpec(min_ndim=2) + + @property + def init_scale(self): + return self._init_scale + + @property + def filters(self): + return self._filters + + @property + def tail_mass(self): + return self._tail_mass + + @property + def optimize_integer_offset(self): + return self._optimize_integer_offset + + @property + def likelihood_bound(self): + return self._likelihood_bound + + @property + def range_coder_precision(self): + return self._range_coder_precision + + @property + def data_format(self): + return self._data_format + + def _channel_axis(self, ndim): + try: + return {"channels_first": 1, "channels_last": ndim - 1}[self.data_format] + except KeyError: + raise ValueError("Unsupported `data_format` for {} layer: {}.".format( + self.__class__.__name__, self.data_format)) + + def _logits_cumulative(self, inputs, stop_gradient): + """Evaluate logits of the cumulative densities. + + Args: + inputs: The values at which to evaluate the cumulative densities, expected + to be a `Tensor` of shape `(channels, 1, batch)`. + stop_gradient: Boolean. Whether to add `array_ops.stop_gradient` calls so + that the gradient of the output with respect to the density model + parameters is disconnected (the gradient with respect to `inputs` is + left untouched). + + Returns: + A `Tensor` of the same shape as `inputs`, containing the logits of the + cumulative densities evaluated at the given inputs. + """ + logits = inputs + + for i in range(len(self.filters) + 1): + matrix = self._matrices[i] + if stop_gradient: + matrix = array_ops.stop_gradient(matrix) + logits = math_ops.matmul(matrix, logits) + + bias = self._biases[i] + if stop_gradient: + bias = array_ops.stop_gradient(bias) + logits += bias + + if i < len(self._factors): + factor = self._factors[i] + if stop_gradient: + factor = array_ops.stop_gradient(factor) + logits += factor * math_ops.tanh(logits) + + return logits + + def build(self, input_shape): + """Builds the layer. + + Creates the variables for the network modeling the densities, creates the + auxiliary loss estimating the median and tail quantiles of the densities, + and then uses that to create the probability mass functions and the update + op that produces the discrete cumulative density functions used by the range + coder. + + Args: + input_shape: Shape of the input tensor, used to get the number of + channels. + + Raises: + ValueError: if `input_shape` doesn't specify the length of the channel + dimension. + """ + input_shape = tensor_shape.TensorShape(input_shape) + channel_axis = self._channel_axis(input_shape.ndims) + channels = input_shape[channel_axis].value + if channels is None: + raise ValueError("The channel dimension of the inputs must be defined.") + self.input_spec = engine.InputSpec( + ndim=input_shape.ndims, axes={channel_axis: channels}) + filters = (1,) + self.filters + (1,) + scale = self.init_scale ** (1 / (len(self.filters) + 1)) + + # Create variables. + self._matrices = [] + self._biases = [] + self._factors = [] + for i in range(len(self.filters) + 1): + init = np.log(np.expm1(1 / scale / filters[i + 1])) + matrix = self.add_variable( + "matrix_{}".format(i), dtype=self.dtype, + shape=(channels, filters[i + 1], filters[i]), + initializer=init_ops.Constant(init)) + matrix = nn.softplus(matrix) + self._matrices.append(matrix) + + bias = self.add_variable( + "bias_{}".format(i), dtype=self.dtype, + shape=(channels, filters[i + 1], 1), + initializer=init_ops.RandomUniform(-.5, .5)) + self._biases.append(bias) + + if i < len(self.filters): + factor = self.add_variable( + "factor_{}".format(i), dtype=self.dtype, + shape=(channels, filters[i + 1], 1), + initializer=init_ops.Zeros()) + factor = math_ops.tanh(factor) + self._factors.append(factor) + + # To figure out what range of the densities to sample, we need to compute + # the quantiles given by `tail_mass / 2` and `1 - tail_mass / 2`. Since we + # can't take inverses of the cumulative directly, we make it an optimization + # problem: + # `quantiles = argmin(|logit(cumulative) - target|)` + # where `target` is `logit(tail_mass / 2)` or `logit(1 - tail_mass / 2)`. + # Taking the logit (inverse of sigmoid) of the cumulative makes the + # representation of the right target more numerically stable. + + # Numerically stable way of computing logits of `tail_mass / 2` + # and `1 - tail_mass / 2`. + target = np.log(2 / self.tail_mass - 1) + # Compute lower and upper tail quantile as well as median. + target = constant_op.constant([-target, 0, target], dtype=self.dtype) + + def quantiles_initializer(shape, dtype=None, partition_info=None): + del partition_info # unused + assert tuple(shape[1:]) == (1, 3) + init = constant_op.constant( + [[[-self.init_scale, 0, self.init_scale]]], dtype=dtype) + return array_ops.tile(init, (shape[0], 1, 1)) + + quantiles = self.add_variable( + "quantiles", shape=(channels, 1, 3), dtype=self.dtype, + initializer=quantiles_initializer) + logits = self._logits_cumulative(quantiles, stop_gradient=True) + loss = math_ops.reduce_sum(abs(logits - target)) + self.add_loss(loss, inputs=None) + + # Save medians for `call`, `compress`, and `decompress`. + self._medians = quantiles[:, :, 1:2] + if not self.optimize_integer_offset: + self._medians = math_ops.round(self._medians) + + # Largest distance observed between lower tail quantile and median, + # or between median and upper tail quantile. + minima = math_ops.reduce_max(self._medians - quantiles[:, :, 0:1]) + maxima = math_ops.reduce_max(quantiles[:, :, 2:3] - self._medians) + minmax = math_ops.maximum(minima, maxima) + minmax = math_ops.ceil(minmax) + minmax = math_ops.maximum(minmax, 1) + + # Sample the density up to `minmax` around the median. + samples = math_ops.range(-minmax, minmax + 1, dtype=self.dtype) + samples += self._medians + + half = constant_op.constant(.5, dtype=self.dtype) + # We strip the sigmoid from the end here, so we can use the special rule + # below to only compute differences in the left tail of the sigmoid. + # This increases numerical stability (see explanation in `call`). + lower = self._logits_cumulative(samples - half, stop_gradient=True) + upper = self._logits_cumulative(samples + half, stop_gradient=True) + # Flip signs if we can move more towards the left tail of the sigmoid. + sign = -math_ops.sign(math_ops.add_n([lower, upper])) + pmf = abs(math_ops.sigmoid(sign * upper) - math_ops.sigmoid(sign * lower)) + # Add tail masses to first and last bin of pmf, as we clip values for + # compression, meaning that out-of-range values get mapped to these bins. + pmf = array_ops.concat([ + math_ops.add_n([pmf[:, 0, :1], math_ops.sigmoid(lower[:, 0, :1])]), + pmf[:, 0, 1:-1], + math_ops.add_n([pmf[:, 0, -1:], math_ops.sigmoid(-upper[:, 0, -1:])]), + ], axis=-1) + self._pmf = pmf + + cdf = coder_ops.pmf_to_quantized_cdf( + pmf, precision=self.range_coder_precision) + def cdf_getter(*args, **kwargs): + del args, kwargs # ignored + return variable_scope.get_variable( + "quantized_cdf", dtype=dtypes.int32, initializer=cdf, + trainable=False, validate_shape=False, collections=()) + # Need to provide a fake shape here since add_variable insists on it. + self._quantized_cdf = self.add_variable( + "quantized_cdf", shape=(channels, 1), dtype=dtypes.int32, + getter=cdf_getter, trainable=False) + + update_op = state_ops.assign( + self._quantized_cdf, cdf, validate_shape=False) + self.add_update(update_op, inputs=None) + + super(EntropyBottleneck, self).build(input_shape) + + def call(self, inputs, training): + """Pass a tensor through the bottleneck. + + Args: + inputs: The tensor to be passed through the bottleneck. + training: Boolean. If `True`, returns a differentiable approximation of + the inputs, and their likelihoods under the modeled probability + densities. If `False`, returns the quantized inputs and their + likelihoods under the corresponding probability mass function. These + quantities can't be used for training, as they are not differentiable, + but represent actual compression more closely. + + Returns: + values: `Tensor` with the same shape as `inputs` containing the perturbed + or quantized input values. + likelihood: `Tensor` with the same shape as `inputs` containing the + likelihood of `values` under the modeled probability distributions. + + Raises: + ValueError: if `inputs` has different `dtype` or number of channels than + a previous set of inputs the model was invoked with earlier. + """ + inputs = ops.convert_to_tensor(inputs) + ndim = self.input_spec.ndim + channel_axis = self._channel_axis(ndim) + half = constant_op.constant(.5, dtype=self.dtype) + + # Convert to (channels, 1, batch) format by commuting channels to front + # and then collapsing. + order = list(range(ndim)) + order.pop(channel_axis) + order.insert(0, channel_axis) + values = array_ops.transpose(inputs, order) + shape = array_ops.shape(values) + values = array_ops.reshape(values, (shape[0], 1, -1)) + + # Add noise or quantize. + if training: + noise = random_ops.random_uniform(array_ops.shape(values), -half, half) + values = math_ops.add_n([values, noise]) + elif self.optimize_integer_offset: + values = math_ops.round(values - self._medians) + self._medians + else: + values = math_ops.round(values) + + # Evaluate densities. + # We can use the special rule below to only compute differences in the left + # tail of the sigmoid. This increases numerical stability: sigmoid(x) is 1 + # for large x, 0 for small x. Subtracting two numbers close to 0 can be done + # with much higher precision than subtracting two numbers close to 1. + lower = self._logits_cumulative(values - half, stop_gradient=False) + upper = self._logits_cumulative(values + half, stop_gradient=False) + # Flip signs if we can move more towards the left tail of the sigmoid. + sign = -math_ops.sign(math_ops.add_n([lower, upper])) + sign = array_ops.stop_gradient(sign) + likelihood = abs( + math_ops.sigmoid(sign * upper) - math_ops.sigmoid(sign * lower)) + if self.likelihood_bound > 0: + likelihood_bound = constant_op.constant( + self.likelihood_bound, dtype=self.dtype) + # TODO(jballe): Override gradients. + likelihood = math_ops.maximum(likelihood, likelihood_bound) + + # Convert back to input tensor shape. + order = list(range(1, ndim)) + order.insert(channel_axis, 0) + values = array_ops.reshape(values, shape) + values = array_ops.transpose(values, order) + likelihood = array_ops.reshape(likelihood, shape) + likelihood = array_ops.transpose(likelihood, order) + + if not context.executing_eagerly(): + values_shape, likelihood_shape = self.compute_output_shape(inputs.shape) + values.set_shape(values_shape) + likelihood.set_shape(likelihood_shape) + + return values, likelihood + + def compress(self, inputs): + """Compress inputs and store their binary representations into strings. + + Args: + inputs: `Tensor` with values to be compressed. + + Returns: + String `Tensor` vector containing the compressed representation of each + batch element of `inputs`. + """ + with ops.name_scope(self._name_scope()): + inputs = ops.convert_to_tensor(inputs) + if not self.built: + # Check input assumptions set before layer building, e.g. input rank. + self._assert_input_compatibility(inputs) + if self.dtype is None: + self._dtype = inputs.dtype.base_dtype.name + self.build(inputs.shape) + + # Check input assumptions set after layer building, e.g. input shape. + if not context.executing_eagerly(): + self._assert_input_compatibility(inputs) + + ndim = self.input_spec.ndim + channel_axis = self._channel_axis(ndim) + # Tuple of slices for expanding dimensions of tensors below. + slices = ndim * [None] + [slice(None)] + slices[channel_axis] = slice(None) + slices = tuple(slices) + + # Expand dimensions of CDF to input dimensions, keeping the channels along + # the right dimension. + cdf = self._quantized_cdf[slices[1:]] + num_levels = array_ops.shape(cdf)[-1] - 1 + + # Bring inputs to the right range by centering the range on the medians. + half = constant_op.constant(.5, dtype=self.dtype) + medians = array_ops.squeeze(self._medians, [1, 2]) + offsets = (math_ops.cast(num_levels // 2, self.dtype) + half) - medians + # Expand offsets to input dimensions and add to inputs. + values = inputs + offsets[slices[:-1]] + + # Clip to range and cast to integers. Because we have added .5 above, and + # all values are positive, the cast effectively implements rounding. + values = math_ops.maximum(values, half) + values = math_ops.minimum( + values, math_ops.cast(num_levels, self.dtype) - half) + values = math_ops.cast(values, dtypes.int16) + + def loop_body(tensor): + return coder_ops.range_encode( + tensor, cdf, precision=self.range_coder_precision) + strings = functional_ops.map_fn( + loop_body, values, dtype=dtypes.string, back_prop=False) + + if not context.executing_eagerly(): + strings.set_shape(inputs.shape[:1]) + + return strings + + def decompress(self, strings, shape, channels=None): + """Decompress values from their compressed string representations. + + Args: + strings: A string `Tensor` vector containing the compressed data. + shape: A `Tensor` vector of int32 type. Contains the shape of the tensor + to be decompressed, excluding the batch dimension. + channels: Integer. Specifies the number of channels statically. Needs only + be set if the layer hasn't been built yet (i.e., this is the first input + it receives). + + Returns: + The decompressed `Tensor`. Its shape will be equal to `shape` prepended + with the batch dimension from `strings`. + + Raises: + ValueError: If the length of `shape` isn't available at graph construction + time. + """ + with ops.name_scope(self._name_scope()): + strings = ops.convert_to_tensor(strings) + shape = ops.convert_to_tensor(shape) + if self.built: + ndim = self.input_spec.ndim + channel_axis = self._channel_axis(ndim) + if channels is None: + channels = self.input_spec.axes[channel_axis] + else: + if not (shape.shape.is_fully_defined() and shape.shape.ndims == 1): + raise ValueError("`shape` must be a vector with known length.") + ndim = shape.shape[0].value + 1 + channel_axis = self._channel_axis(ndim) + input_shape = ndim * [None] + input_shape[channel_axis] = channels + self.build(input_shape) + + # Tuple of slices for expanding dimensions of tensors below. + slices = ndim * [None] + [slice(None)] + slices[channel_axis] = slice(None) + slices = tuple(slices) + + # Expand dimensions of CDF to input dimensions, keeping the channels along + # the right dimension. + cdf = self._quantized_cdf[slices[1:]] + num_levels = array_ops.shape(cdf)[-1] - 1 + + def loop_body(string): + return coder_ops.range_decode( + string, shape, cdf, precision=self.range_coder_precision) + outputs = functional_ops.map_fn( + loop_body, strings, dtype=dtypes.int16, back_prop=False) + outputs = math_ops.cast(outputs, self.dtype) + + medians = array_ops.squeeze(self._medians, [1, 2]) + offsets = math_ops.cast(num_levels // 2, self.dtype) - medians + outputs -= offsets[slices[:-1]] + + if not context.executing_eagerly(): + outputs_shape = ndim * [None] + outputs_shape[0] = strings.shape[0] + outputs_shape[channel_axis] = channels + outputs.set_shape(outputs_shape) + + return outputs + + def visualize(self): + """Multi-channel visualization of densities as images. + + Creates and returns an image summary visualizing the current probabilty + density estimates. The image contains one row for each channel. Within each + row, the pixel intensities are proportional to probability values, and each + row is centered on the median of the corresponding distribution. + + Returns: + The created image summary. + """ + with ops.name_scope(self._name_scope()): + image = self._pmf + image *= 255 / math_ops.reduce_max(image, axis=1, keepdims=True) + image = math_ops.cast(image + .5, dtypes.uint8) + image = image[None, :, :, None] + return summary.image("pmf", image, max_outputs=1) + + def compute_output_shape(self, input_shape): + input_shape = tensor_shape.TensorShape(input_shape) + return input_shape, input_shape diff --git a/tensorflow/contrib/coder/python/layers/entropybottleneck_test.py b/tensorflow/contrib/coder/python/layers/entropybottleneck_test.py new file mode 100644 index 0000000000..798b0234eb --- /dev/null +++ b/tensorflow/contrib/coder/python/layers/entropybottleneck_test.py @@ -0,0 +1,315 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests of EntropyBottleneck class.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.contrib.coder.python.layers import entropybottleneck + +from tensorflow.python.framework import dtypes +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import variables +from tensorflow.python.platform import test +from tensorflow.python.training import gradient_descent + + +class EntropyBottleneckTest(test.TestCase): + + def test_noise(self): + # Tests that the noise added is uniform noise between -0.5 and 0.5. + inputs = array_ops.placeholder(dtypes.float32, (None, 1)) + layer = entropybottleneck.EntropyBottleneck() + noisy, _ = layer(inputs, training=True) + with self.test_session() as sess: + sess.run(variables.global_variables_initializer()) + values = np.linspace(-50, 50, 100)[:, None] + noisy, = sess.run([noisy], {inputs: values}) + self.assertFalse(np.allclose(values, noisy, rtol=0, atol=.49)) + self.assertAllClose(values, noisy, rtol=0, atol=.5) + + def test_quantization(self): + # Tests that inputs are quantized to full integer values, even after + # quantiles have been updated. + inputs = array_ops.placeholder(dtypes.float32, (None, 1)) + layer = entropybottleneck.EntropyBottleneck(optimize_integer_offset=False) + quantized, _ = layer(inputs, training=False) + opt = gradient_descent.GradientDescentOptimizer(learning_rate=1) + self.assertTrue(len(layer.losses) == 1) + step = opt.minimize(layer.losses[0]) + with self.test_session() as sess: + sess.run(variables.global_variables_initializer()) + sess.run(step) + values = np.linspace(-50, 50, 100)[:, None] + quantized, = sess.run([quantized], {inputs: values}) + self.assertAllClose(np.around(values), quantized, rtol=0, atol=1e-6) + + def test_quantization_optimized_offset(self): + # Tests that inputs are not quantized to full integer values after quantiles + # have been updated. However, the difference between input and output should + # be between -0.5 and 0.5, and the offset must be consistent. + inputs = array_ops.placeholder(dtypes.float32, (None, 1)) + layer = entropybottleneck.EntropyBottleneck(optimize_integer_offset=True) + quantized, _ = layer(inputs, training=False) + opt = gradient_descent.GradientDescentOptimizer(learning_rate=1) + self.assertTrue(len(layer.losses) == 1) + step = opt.minimize(layer.losses[0]) + with self.test_session() as sess: + sess.run(variables.global_variables_initializer()) + sess.run(step) + values = np.linspace(-50, 50, 100)[:, None] + quantized, = sess.run([quantized], {inputs: values}) + self.assertAllClose(values, quantized, rtol=0, atol=.5) + diff = np.ravel(np.around(values) - quantized) % 1 + self.assertAllClose(diff, np.full_like(diff, diff[0]), rtol=0, atol=5e-6) + self.assertNotEqual(diff[0], 0) + + def test_codec(self): + # Tests that inputs are compressed and decompressed correctly, and quantized + # to full integer values, even after quantiles have been updated. + inputs = array_ops.placeholder(dtypes.float32, (1, None, 1)) + layer = entropybottleneck.EntropyBottleneck( + data_format="channels_last", init_scale=60, + optimize_integer_offset=False) + bitstrings = layer.compress(inputs) + decoded = layer.decompress(bitstrings, array_ops.shape(inputs)[1:]) + opt = gradient_descent.GradientDescentOptimizer(learning_rate=1) + self.assertTrue(len(layer.losses) == 1) + step = opt.minimize(layer.losses[0]) + with self.test_session() as sess: + sess.run(variables.global_variables_initializer()) + sess.run(step) + self.assertTrue(len(layer.updates) == 1) + sess.run(layer.updates[0]) + values = np.linspace(-50, 50, 100)[None, :, None] + decoded, = sess.run([decoded], {inputs: values}) + self.assertAllClose(np.around(values), decoded, rtol=0, atol=1e-6) + + def test_codec_optimized_offset(self): + # Tests that inputs are compressed and decompressed correctly, and not + # quantized to full integer values after quantiles have been updated. + # However, the difference between input and output should be between -0.5 + # and 0.5, and the offset must be consistent. + inputs = array_ops.placeholder(dtypes.float32, (1, None, 1)) + layer = entropybottleneck.EntropyBottleneck( + data_format="channels_last", init_scale=60, + optimize_integer_offset=True) + bitstrings = layer.compress(inputs) + decoded = layer.decompress(bitstrings, array_ops.shape(inputs)[1:]) + opt = gradient_descent.GradientDescentOptimizer(learning_rate=1) + self.assertTrue(len(layer.losses) == 1) + step = opt.minimize(layer.losses[0]) + with self.test_session() as sess: + sess.run(variables.global_variables_initializer()) + sess.run(step) + self.assertTrue(len(layer.updates) == 1) + sess.run(layer.updates[0]) + values = np.linspace(-50, 50, 100)[None, :, None] + decoded, = sess.run([decoded], {inputs: values}) + self.assertAllClose(values, decoded, rtol=0, atol=.5) + diff = np.ravel(np.around(values) - decoded) % 1 + self.assertAllClose(diff, np.full_like(diff, diff[0]), rtol=0, atol=5e-6) + self.assertNotEqual(diff[0], 0) + + def test_codec_clipping(self): + # Tests that inputs are compressed and decompressed correctly, and clipped + # to the expected range. + inputs = array_ops.placeholder(dtypes.float32, (1, None, 1)) + layer = entropybottleneck.EntropyBottleneck( + data_format="channels_last", init_scale=40) + bitstrings = layer.compress(inputs) + decoded = layer.decompress(bitstrings, array_ops.shape(inputs)[1:]) + with self.test_session() as sess: + sess.run(variables.global_variables_initializer()) + self.assertTrue(len(layer.updates) == 1) + sess.run(layer.updates[0]) + values = np.linspace(-50, 50, 100)[None, :, None] + decoded, = sess.run([decoded], {inputs: values}) + expected = np.clip(np.around(values), -40, 40) + self.assertAllClose(expected, decoded, rtol=0, atol=1e-6) + + def test_channels_last(self): + # Test the layer with more than one channel and multiple input dimensions, + # with the channels in the last dimension. + inputs = array_ops.placeholder(dtypes.float32, (None, None, None, 2)) + layer = entropybottleneck.EntropyBottleneck( + data_format="channels_last", init_scale=50) + noisy, _ = layer(inputs, training=True) + quantized, _ = layer(inputs, training=False) + bitstrings = layer.compress(inputs) + decoded = layer.decompress(bitstrings, array_ops.shape(inputs)[1:]) + with self.test_session() as sess: + sess.run(variables.global_variables_initializer()) + self.assertTrue(len(layer.updates) == 1) + sess.run(layer.updates[0]) + values = 5 * np.random.normal(size=(7, 5, 3, 2)) + noisy, quantized, decoded = sess.run( + [noisy, quantized, decoded], {inputs: values}) + self.assertAllClose(values, noisy, rtol=0, atol=.5) + self.assertAllClose(values, quantized, rtol=0, atol=.5) + self.assertAllClose(values, decoded, rtol=0, atol=.5) + + def test_channels_first(self): + # Test the layer with more than one channel and multiple input dimensions, + # with the channel dimension right after the batch dimension. + inputs = array_ops.placeholder(dtypes.float32, (None, 3, None, None)) + layer = entropybottleneck.EntropyBottleneck( + data_format="channels_first", init_scale=50) + noisy, _ = layer(inputs, training=True) + quantized, _ = layer(inputs, training=False) + bitstrings = layer.compress(inputs) + decoded = layer.decompress(bitstrings, array_ops.shape(inputs)[1:]) + with self.test_session() as sess: + sess.run(variables.global_variables_initializer()) + self.assertTrue(len(layer.updates) == 1) + sess.run(layer.updates[0]) + values = 5 * np.random.normal(size=(2, 3, 5, 7)) + noisy, quantized, decoded = sess.run( + [noisy, quantized, decoded], {inputs: values}) + self.assertAllClose(values, noisy, rtol=0, atol=.5) + self.assertAllClose(values, quantized, rtol=0, atol=.5) + self.assertAllClose(values, decoded, rtol=0, atol=.5) + + def test_compress(self): + # Test compression and decompression, and produce test data for + # `test_decompress`. If you set the constant at the end to `True`, this test + # will fail and the log will contain the new test data. + inputs = array_ops.placeholder(dtypes.float32, (2, 3, 10)) + layer = entropybottleneck.EntropyBottleneck( + data_format="channels_first", filters=(), init_scale=2) + bitstrings = layer.compress(inputs) + decoded = layer.decompress(bitstrings, array_ops.shape(inputs)[1:]) + with self.test_session() as sess: + sess.run(variables.global_variables_initializer()) + self.assertTrue(len(layer.updates) == 1) + sess.run(layer.updates[0]) + values = 5 * np.random.uniform(size=(2, 3, 10)) - 2.5 + bitstrings, quantized_cdf, decoded = sess.run( + [bitstrings, layer._quantized_cdf, decoded], {inputs: values}) + self.assertAllClose(values, decoded, rtol=0, atol=.5) + # Set this constant to `True` to log new test data for `test_decompress`. + if False: # pylint:disable=using-constant-test + assert False, (bitstrings, quantized_cdf, decoded) + + # Data generated by `test_compress`. + # pylint:disable=g-inconsistent-quotes,bad-whitespace + bitstrings = np.array([ + b'\x1e\xbag}\xc2\xdaN\x8b\xbd.', + b'\x8dF\xf0%\x1cv\xccllW' + ], dtype=object) + + quantized_cdf = np.array([ + [ 0, 15636, 22324, 30145, 38278, 65536], + [ 0, 19482, 26927, 35052, 42904, 65535], + [ 0, 21093, 28769, 36919, 44578, 65536] + ], dtype=np.int32) + + expected = np.array([ + [[-2., 1., 0., -2., -1., -2., -2., -2., 2., -1.], + [ 1., 2., 1., 0., -2., -2., 1., 2., 0., 1.], + [ 2., 0., -2., 2., 0., -1., -2., 0., 2., 0.]], + [[ 1., 2., 0., -1., 1., 2., 1., 1., 2., -2.], + [ 2., -1., -1., 0., -1., 2., 0., 2., -2., 2.], + [ 2., -2., -2., -1., -2., 1., -2., 0., 0., 0.]] + ], dtype=np.float32) + # pylint:enable=g-inconsistent-quotes,bad-whitespace + + def test_decompress(self): + # Test that decompression of values compressed with a previous version + # works, i.e. that the file format doesn't change across revisions. + bitstrings = array_ops.placeholder(dtypes.string) + input_shape = array_ops.placeholder(dtypes.int32) + quantized_cdf = array_ops.placeholder(dtypes.int32) + layer = entropybottleneck.EntropyBottleneck( + data_format="channels_first", filters=(), dtype=dtypes.float32) + layer.build(self.expected.shape) + layer._quantized_cdf = quantized_cdf + decoded = layer.decompress(bitstrings, input_shape[1:]) + with self.test_session() as sess: + sess.run(variables.global_variables_initializer()) + decoded, = sess.run([decoded], { + bitstrings: self.bitstrings, input_shape: self.expected.shape, + quantized_cdf: self.quantized_cdf}) + self.assertAllClose(self.expected, decoded, rtol=0, atol=1e-6) + + def test_build_decompress(self): + # Test that layer can be built when `decompress` is the first call to it. + bitstrings = array_ops.placeholder(dtypes.string) + input_shape = array_ops.placeholder(dtypes.int32, shape=[3]) + layer = entropybottleneck.EntropyBottleneck(dtype=dtypes.float32) + layer.decompress(bitstrings, input_shape[1:], channels=5) + self.assertTrue(layer.built) + + def test_pmf_normalization(self): + # Test that probability mass functions are normalized correctly. + layer = entropybottleneck.EntropyBottleneck(dtype=dtypes.float32) + layer.build((None, 10)) + with self.test_session() as sess: + sess.run(variables.global_variables_initializer()) + pmf, = sess.run([layer._pmf]) + self.assertAllClose(np.ones(10), np.sum(pmf, axis=-1), rtol=0, atol=1e-6) + + def test_visualize(self): + # Test that summary op can be constructed. + layer = entropybottleneck.EntropyBottleneck(dtype=dtypes.float32) + layer.build((None, 10)) + summary = layer.visualize() + with self.test_session() as sess: + sess.run(variables.global_variables_initializer()) + sess.run([summary]) + + def test_normalization(self): + # Test that densities are normalized correctly. + inputs = array_ops.placeholder(dtypes.float32, (None, 1)) + layer = entropybottleneck.EntropyBottleneck(filters=(2,)) + _, likelihood = layer(inputs, training=True) + with self.test_session() as sess: + sess.run(variables.global_variables_initializer()) + x = np.repeat(np.arange(-200, 201), 1000)[:, None] + likelihood, = sess.run([likelihood], {inputs: x}) + self.assertEqual(x.shape, likelihood.shape) + integral = np.sum(likelihood) * .001 + self.assertAllClose(1, integral, rtol=0, atol=1e-4) + + def test_entropy_estimates(self): + # Test that entropy estimates match actual range coding. + inputs = array_ops.placeholder(dtypes.float32, (1, None, 1)) + layer = entropybottleneck.EntropyBottleneck( + filters=(2, 3), data_format="channels_last") + _, likelihood = layer(inputs, training=True) + diff_entropy = math_ops.reduce_sum(math_ops.log(likelihood)) / -np.log(2) + _, likelihood = layer(inputs, training=False) + disc_entropy = math_ops.reduce_sum(math_ops.log(likelihood)) / -np.log(2) + bitstrings = layer.compress(inputs) + with self.test_session() as sess: + sess.run(variables.global_variables_initializer()) + self.assertTrue(len(layer.updates) == 1) + sess.run(layer.updates[0]) + diff_entropy, disc_entropy, bitstrings = sess.run( + [diff_entropy, disc_entropy, bitstrings], + {inputs: np.random.normal(size=(1, 10000, 1))}) + codelength = 8 * sum(len(bitstring) for bitstring in bitstrings) + self.assertAllClose(diff_entropy, disc_entropy, rtol=5e-3, atol=0) + self.assertAllClose(disc_entropy, codelength, rtol=5e-3, atol=0) + self.assertGreater(codelength, disc_entropy) + + +if __name__ == "__main__": + test.main() -- GitLab From 8e544335e15029ccccbe743ee0fefaa344b62e4e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 23 Apr 2018 13:28:01 -0700 Subject: [PATCH 158/434] Remove unused function from FunctionDefLibrary. PiperOrigin-RevId: 193974712 --- .../grappler/optimizers/function_optimizer.cc | 126 +++++++++++++++--- .../grappler/optimizers/function_optimizer.h | 6 +- .../optimizers/function_optimizer_test.cc | 32 ++--- .../grappler/optimizers/meta_optimizer.cc | 6 +- tensorflow/core/grappler/utils/functions.cc | 12 +- tensorflow/core/grappler/utils/functions.h | 40 ++++-- .../core/grappler/utils/functions_test.cc | 8 +- 7 files changed, 163 insertions(+), 67 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/function_optimizer.cc b/tensorflow/core/grappler/optimizers/function_optimizer.cc index d008a9719f..47e7dc0a96 100644 --- a/tensorflow/core/grappler/optimizers/function_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/function_optimizer.cc @@ -22,6 +22,7 @@ limitations under the License. #include "tensorflow/core/framework/function.pb.h" #include "tensorflow/core/framework/graph_def_util.h" #include "tensorflow/core/framework/node_def.pb.h" +#include "tensorflow/core/framework/node_def_util.h" #include "tensorflow/core/framework/op_def.pb.h" #include "tensorflow/core/framework/versions.pb.h" #include "tensorflow/core/graph/graph_constructor.h" @@ -75,12 +76,10 @@ string UniqueSpecializedFunctionName(const FunctionDef& func, class FunctionOptimizerContext { public: - explicit FunctionOptimizerContext(const GrapplerItem& item, - RewriterConfig::Toggle opt_level) - : opt_level_(opt_level), - function_library_(FunctionLibraryDefinition(OpRegistry::Global(), - item.graph.library())) { - InitializeInlinedFunctions(item); + explicit FunctionOptimizerContext(RewriterConfig::Toggle opt_level, + const GrapplerItem& item) + : function_library_(OpRegistry::Global(), item.graph.library()) { + InitializeInlinedFunctions(opt_level, item); } const FunctionLibraryDefinition& function_library() const { @@ -101,8 +100,9 @@ class FunctionOptimizerContext { } private: - void InitializeInlinedFunctions(const GrapplerItem& item) { - bool aggressive = opt_level_ == RewriterConfig::AGGRESSIVE; + void InitializeInlinedFunctions(RewriterConfig::Toggle opt_level, + const GrapplerItem& item) { + bool aggressive = opt_level == RewriterConfig::AGGRESSIVE; for (const FunctionDef& func : item.graph.library().function()) { // Can't create IdentityN nodes with no input or output: skip these @@ -120,7 +120,6 @@ class FunctionOptimizerContext { } } - RewriterConfig::Toggle opt_level_; FunctionLibraryDefinition function_library_; // Functions that can be inlined into optimized graph. std::unordered_map inlined_functions_; @@ -128,9 +127,93 @@ class FunctionOptimizerContext { TF_DISALLOW_COPY_AND_ASSIGN(FunctionOptimizerContext); }; +// Return trimmed FunctionDefLibrary with functions that are reachable from +// the optimized graph. +FunctionDefLibrary TrimFunctionLibrary(const FunctionLibraryDefinition& flib, + const GraphDef& optimized_graph) { + // Functions that are reachable from the optimized graph. + std::unordered_set keep_funcs; + + std::vector func_queue; + func_queue.reserve(flib.num_functions()); + + // Add registered and not already processed functions to the queue by name. + const auto add_to_func_queue = [&](const string& func_name) { + const FunctionDef* func = flib.Find(func_name); + if (func && keep_funcs.find(func_name) == keep_funcs.end()) { + func_queue.push_back(func); + } + }; + + // Find all the functions that are reachable from the given node. + const auto add_node_to_func_queue = [&](const NodeDef& node) { + // Node itself can be a call to the function. + add_to_func_queue(node.op()); + + // Or node can have an attribute referencing a function. + for (const auto& attr : node.attr()) { + const auto& attr_value = attr.second; + + // 1. AttrValue.func + if (attr_value.has_func()) { + add_to_func_queue(attr_value.func().name()); + } + + // 2. AttrValue.ListValue.func + if (attr_value.has_list()) { + for (const auto& func : attr_value.list().func()) { + add_to_func_queue(func.name()); + } + } + } + }; + + // Add all functions that are directly called from the optimized graph. + const auto& graph_nodes = optimized_graph.node(); + std::for_each(graph_nodes.begin(), graph_nodes.end(), add_node_to_func_queue); + + // Process all reachable functions. + while (!func_queue.empty()) { + const FunctionDef* func = func_queue.back(); + func_queue.pop_back(); + + const string& func_name = func->signature().name(); + keep_funcs.insert(func_name); + + // Find all the functions called from the function body. + const auto& func_body = func->node_def(); + std::for_each(func_body.begin(), func_body.end(), add_node_to_func_queue); + + // Check if the function has a registered gradient. + const string grad_func_name = flib.FindGradient(func_name); + if (!grad_func_name.empty()) add_to_func_queue(grad_func_name); + } + + FunctionDefLibrary lib; + for (const string& func_name : keep_funcs) { + const FunctionDef* func = CHECK_NOTNULL(flib.Find(func_name)); + *lib.add_function() = *func; + + const string grad_func_name = flib.FindGradient(func_name); + if (!grad_func_name.empty()) { + GradientDef* gd = lib.add_gradient(); + gd->set_function_name(func_name); + gd->set_gradient_func(grad_func_name); + } + } + + VLOG(3) << "Trimmed function library: " << keep_funcs.size() << " functions (" + << static_cast(keep_funcs.size() - flib.num_functions()) << ")"; + + return lib; +} + Status SpecializeFunction(const NodeDef& func_node, const FunctionDef& func, FunctionOptimizerContext* ctx, GraphDef* optimized_graph) { + VLOG(2) << "Specialize function instantiation: " + << SummarizeNodeDef(func_node); + const std::unordered_map func_attr( func_node.attr().begin(), func_node.attr().end()); @@ -141,20 +224,20 @@ Status SpecializeFunction(const NodeDef& func_node, const FunctionDef& func, TF_RETURN_IF_ERROR(MakeGrapplerFunctionItem(func, func_attr, flib, &item)); // TODO(ezhulenev): Push down const inputs and known input shapes. - FunctionDef specialized; - TF_RETURN_IF_ERROR(MakeSpecializedFunctionDef(item, flib, &specialized)); + FunctionDef specialized_func; + TF_RETURN_IF_ERROR(MakeFunctionDef(item, flib, &specialized_func)); // Find a name for specialized function. const string specialized_func_name = UniqueSpecializedFunctionName(func, func_node, flib); - specialized.mutable_signature()->set_name(specialized_func_name); - auto* specialized_attr = specialized.mutable_attr(); + specialized_func.mutable_signature()->set_name(specialized_func_name); + auto* specialized_attr = specialized_func.mutable_attr(); (*specialized_attr)[kGrapplerSpecializedFuncAttr].set_b(true); // Add specialized function to the library. TF_RETURN_IF_ERROR( - ctx->mutable_function_library().AddFunctionDef(specialized)); + ctx->mutable_function_library().AddFunctionDef(specialized_func)); // Add a function call node for the specialized function. NodeDef* specialized_func_node = optimized_graph->add_node(); @@ -226,6 +309,8 @@ Status HookInlinedFunctionOutputs( Status InlineFunction(const NodeDef& func_node, const FunctionDef& func, const FunctionOptimizerContext& ctx, GraphDef* optimized_graph) { + VLOG(2) << "Inline function instantiation: " << SummarizeNodeDef(func_node); + const std::unordered_map func_attr( func_node.attr().begin(), func_node.attr().end()); @@ -359,6 +444,8 @@ class SymbolicGradientEnv { Status InlineSymbolicGradient(const NodeDef& node, SymbolicGradientEnv* env, GraphDef* inlined_graph) { + VLOG(2) << "Inline symbolic gradient: " << SummarizeNodeDef(node); + GraphDef graph_def; // Create a node to anchor the gradient inputs @@ -454,13 +541,16 @@ Status InlineSymbolicGradient(const NodeDef& node, SymbolicGradientEnv* env, Status FunctionOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, GraphDef* optimized_graph) { + VLOG(1) << "Optimize Grappler item: id=" << item.id; + // Nothing to do here. if (item.graph.library().function_size() == 0) { + VLOG(3) << "Skip Grappler item with empty function library"; *optimized_graph = item.graph; return Status::OK(); } - FunctionOptimizerContext ctx(item, opt_level_); + FunctionOptimizerContext ctx(opt_level_, item); SymbolicGradientEnv env(item.graph.versions().producer(), item.graph.library()); @@ -506,9 +596,11 @@ Status FunctionOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, *optimized_graph->add_node() = node; } - // TODO(bsteiner): trim the library to remove unused function definitions *optimized_graph->mutable_versions() = item.graph.versions(); - *optimized_graph->mutable_library() = ctx.function_library().ToProto(); + *optimized_graph->mutable_library() = + options_.enable_trim_function_library + ? TrimFunctionLibrary(ctx.function_library(), *optimized_graph) + : ctx.function_library().ToProto(); return Status::OK(); } diff --git a/tensorflow/core/grappler/optimizers/function_optimizer.h b/tensorflow/core/grappler/optimizers/function_optimizer.h index c555fadf83..e307b4e533 100644 --- a/tensorflow/core/grappler/optimizers/function_optimizer.h +++ b/tensorflow/core/grappler/optimizers/function_optimizer.h @@ -26,8 +26,9 @@ namespace grappler { // operations to make the overall graph more efficient. class FunctionOptimizer : public GraphOptimizer { public: - FunctionOptimizer(RewriterConfig::Toggle opt_level) : opt_level_(opt_level) {} - ~FunctionOptimizer() override {} + explicit FunctionOptimizer(RewriterConfig::Toggle opt_level) + : opt_level_(opt_level) {} + ~FunctionOptimizer() override = default; string name() const override { return "function_optimizer"; }; @@ -44,6 +45,7 @@ class FunctionOptimizer : public GraphOptimizer { bool enable_function_inlining = true; bool enable_function_specialization = true; bool enable_symbolic_gradient_inlining = true; + bool enable_trim_function_library = true; }; RewriterConfig::Toggle opt_level_; diff --git a/tensorflow/core/grappler/optimizers/function_optimizer_test.cc b/tensorflow/core/grappler/optimizers/function_optimizer_test.cc index fb006d4868..6147e8a27c 100644 --- a/tensorflow/core/grappler/optimizers/function_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/function_optimizer_test.cc @@ -31,20 +31,8 @@ constexpr char kDevice[] = "/device:CPU:0"; class FunctionOptimizerTest : public GrapplerTest { protected: - void DisableAll(FunctionOptimizer* optimizer) { - optimizer->options_.enable_function_inlining = false; + void DisableFunctionSpecialization(FunctionOptimizer* optimizer) { optimizer->options_.enable_function_specialization = false; - optimizer->options_.enable_symbolic_gradient_inlining = false; - } - - void EnableOnlyFunctionInlining(FunctionOptimizer* optimizer) { - DisableAll(optimizer); - optimizer->options_.enable_function_inlining = true; - } - - void EnableOnlyFunctionSpecialization(FunctionOptimizer* optimizer) { - DisableAll(optimizer); - optimizer->options_.enable_function_specialization = true; } }; @@ -352,7 +340,7 @@ TEST_F(FunctionOptimizerTest, InlineFunction_FunctionWithoutInput) { using test::function::NDef; FunctionOptimizer optimizer(RewriterConfig::DEFAULT); - EnableOnlyFunctionInlining(&optimizer); + DisableFunctionSpecialization(&optimizer); // do not specialize noinline func const Tensor kTwo = test::AsScalar(2); FunctionDef func = FunctionDefHelper::Define( @@ -626,14 +614,13 @@ TEST_F(FunctionOptimizerTest, SpecializeFunction_XTimesTwo) { using test::function::NDef; FunctionOptimizer optimizer(RewriterConfig::DEFAULT); - EnableOnlyFunctionSpecialization(&optimizer); - // Mark XTimesTwo as noinline + // Mark XTimesTwo as noinline. FunctionDef x_times_two = test::function::XTimesTwo(); (*x_times_two.mutable_attr())["_noinline"].set_b(true); std::vector function_library = {x_times_two}; - // Build a graph to compute y = XTimesTwo(x) + // Build a graph to compute y = XTimesTwo(x). GrapplerItem item; item.graph = test::function::GDef( {NDef("x", "Placeholder", {}, {{"dtype", DT_FLOAT}}, kDevice), @@ -644,12 +631,13 @@ TEST_F(FunctionOptimizerTest, SpecializeFunction_XTimesTwo) { GraphDef output; TF_EXPECT_OK(optimizer.Optimize(nullptr, item, &output)); - // Make sure that specialized function was added to the library - EXPECT_EQ(2, output.library().function_size()); + // Make sure that specialized function was added to the library and original + // function was removed. + EXPECT_EQ(1, output.library().function_size()); EXPECT_EQ("XTimesTwo_specialized_for_y", - output.library().function(1).signature().name()); + output.library().function(0).signature().name()); - // And 'y' node is calling specialized function + // And 'y' node is calling specialized function. int count = 0; for (const NodeDef& node : output.node()) { if (node.name() == "y" && count++) { @@ -658,7 +646,7 @@ TEST_F(FunctionOptimizerTest, SpecializeFunction_XTimesTwo) { } EXPECT_EQ(1, count); - // And that graph evaluation yields the same result + // And that graph evaluation yields the same result. Tensor pi = test::AsScalar(3.14f); item.fetch = {"z"}; item.feed.emplace_back("x", pi); diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc index 558b8a77e8..335fb403f1 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc @@ -219,11 +219,7 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, if (already_optimized) { TF_RETURN_IF_ERROR(TopologicalSort(optimized_graph)); ReassignColocation(optimized_graph); - // Make sure that the optimizers preserved the graph version and library. - DCHECK_GE(optimized_graph->library().function_size(), - item.graph.library().function_size()); - DCHECK_GE(optimized_graph->library().gradient_size(), - item.graph.library().gradient_size()); + // Make sure that the optimizers preserved the graph version. DCHECK_EQ(optimized_graph->versions().producer(), item.graph.versions().producer()); } diff --git a/tensorflow/core/grappler/utils/functions.cc b/tensorflow/core/grappler/utils/functions.cc index 638fe1999a..790809bc67 100644 --- a/tensorflow/core/grappler/utils/functions.cc +++ b/tensorflow/core/grappler/utils/functions.cc @@ -545,6 +545,12 @@ Status MakeGrapplerFunctionItem(const FunctionDef& func, return Status::OK(); } +Status MakeGrapplerFunctionItem(const FunctionDef& func, + const FunctionLibraryDefinition& flib, + GrapplerFunctionItem* item) { + return MakeGrapplerFunctionItem(func, AttrValueMap(), flib, item); +} + // Register GrapplerFunctionItem input arg expansion and function body outputs // in the GrapplerFunctionConnectivity. Status RegisterGrapplerFunctionConnectivity( @@ -560,9 +566,9 @@ Status RegisterGrapplerFunctionConnectivity( return Status::OK(); } -Status MakeSpecializedFunctionDef(const GrapplerFunctionItem& item, - const FunctionLibraryDefinition& flib, - FunctionDef* func) { +Status MakeFunctionDef(const GrapplerFunctionItem& item, + const FunctionLibraryDefinition& flib, + FunctionDef* func) { func->mutable_signature()->set_name(item.id); func->mutable_signature()->set_is_stateful(item.is_stateful()); diff --git a/tensorflow/core/grappler/utils/functions.h b/tensorflow/core/grappler/utils/functions.h index ab369bcad7..5e8b6c6960 100644 --- a/tensorflow/core/grappler/utils/functions.h +++ b/tensorflow/core/grappler/utils/functions.h @@ -38,7 +38,8 @@ using AttrValueMap = std::unordered_map; // function body in place of function inputs and a resolved input data type. struct InputArgExpansion { // TODO(ezhulenev): Add support for functions with tensor sequence inputs of - // different data types + // different data types. + // TODO(ezhulenev): Support type parametrized inputs? string input_name; // name of the function input argument DataType data_type; // input data type bool is_ref; // if true, inputs are required to be refs @@ -53,7 +54,8 @@ struct InputArgExpansion { // tensors of a function body nodes and a resolved output data type struct OutputArgExpansion { // TODO(ezhulenev): Add support for functions with tensor sequence outputs of - // different data types + // different data types. + // TODO(ezhulenev): Support type parametrized outputs? string output_name; // name of the function output argument DataType data_type; // output data type bool is_ref; // if true, outputs are refs @@ -186,13 +188,6 @@ bool HasParametrizedBody(const FunctionDef& func); // Check if function has parametrized type or body. bool IsParametrized(const FunctionDef& func); -// Make a GrapplerFunctionItem from the function definition and attributes. -// Return error if the given function def cannot be converted. -Status MakeGrapplerFunctionItem( - const FunctionDef& func, - const std::unordered_map& func_instantiation_attr, - const FunctionLibraryDefinition& flib, GrapplerFunctionItem* item); - // Register GrapplerFunctionItem input arg expansion and function body outputs // in the GrapplerFunctionConnectivity. Use function library definition to // lookup function body nodes output names and ranges. @@ -200,11 +195,28 @@ Status RegisterGrapplerFunctionConnectivity( const GrapplerFunctionItem& item, const FunctionLibraryDefinition& flib, GrapplerFunctionConnectivity* connectivity); -// Make a specialized FunctionDef from the GrapplerFunctionItem. Use function -// library definition to lookup function body nodes output names and ranges. -Status MakeSpecializedFunctionDef(const GrapplerFunctionItem& item, - const FunctionLibraryDefinition& flib, - FunctionDef* func); +// Make a GrapplerFunctionItem from the function definition and function +// instantiation attributes (caller node attributes). Returns error if the given +// function def cannot be converted (e.g. not all attributes are defined). +Status MakeGrapplerFunctionItem( + const FunctionDef& func, + const std::unordered_map& func_instantiation_attr, + const FunctionLibraryDefinition& flib, GrapplerFunctionItem* item); + +// Make a GrapplerFunction item from the function definition. Function must be +// fully defined (no type or body parametrization). +// TODO(ezhulenev): Support parametrized functions without fully defined +// instantiation attributes? Do we ever want to optimize parametrized function +// without specializing it to it's instantiation attributes (at least types)? +Status MakeGrapplerFunctionItem(const FunctionDef& func, + const FunctionLibraryDefinition& flib, + GrapplerFunctionItem* item); + +// Make a FunctionDef from the GrapplerFunctionItem. Use function library +// definition to lookup function body nodes output names and ranges. +Status MakeFunctionDef(const GrapplerFunctionItem& item, + const FunctionLibraryDefinition& flib, + FunctionDef* func); } // end namespace grappler } // end namespace tensorflow diff --git a/tensorflow/core/grappler/utils/functions_test.cc b/tensorflow/core/grappler/utils/functions_test.cc index 54d235a8a4..6dfd49b943 100644 --- a/tensorflow/core/grappler/utils/functions_test.cc +++ b/tensorflow/core/grappler/utils/functions_test.cc @@ -524,7 +524,7 @@ TEST_F(FunctionsTest, FromFunctionDefWithoutInput) { EXPECT_EQ("two", cast.input(0)); } -TEST_F(FunctionsTest, MakeSpecializedFunctionDef) { +TEST_F(FunctionsTest, MakeFunctionDef) { const Tensor kTwo = test::AsScalar(2); FunctionDef func = FunctionDefHelper::Define( // Name @@ -550,7 +550,7 @@ TEST_F(FunctionsTest, MakeSpecializedFunctionDef) { TF_EXPECT_OK(MakeGrapplerFunctionItem(func, func_attr, flib, &item)); FunctionDef specialized; - TF_EXPECT_OK(MakeSpecializedFunctionDef(item, flib, &specialized)); + TF_EXPECT_OK(MakeFunctionDef(item, flib, &specialized)); // Input and output types are resolved based on instantiation attributes. EXPECT_EQ("x", specialized.signature().input_arg(0).name()); @@ -573,7 +573,7 @@ TEST_F(FunctionsTest, MakeSpecializedFunctionDef) { EXPECT_EQ(2, count); } -TEST_F(FunctionsTest, SwapFunctionBodyAndMakeSpecializedFunctionDef) { +TEST_F(FunctionsTest, SwapFunctionBodyAndMakeFunctionDef) { using test::function::NDef; FunctionDef mul_func = FunctionDefHelper::Create( @@ -606,7 +606,7 @@ TEST_F(FunctionsTest, SwapFunctionBodyAndMakeSpecializedFunctionDef) { // Replace function body with identity function item.SwapFunctionBody(std::move(id_func_body)); FunctionDef specialized; - TF_EXPECT_OK(MakeSpecializedFunctionDef(item, flib, &specialized)); + TF_EXPECT_OK(MakeFunctionDef(item, flib, &specialized)); // Check that graph body was updated. int count = 0; -- GitLab From 19ee0605b6eadb516703c37b7ba38e7122a6c51f Mon Sep 17 00:00:00 2001 From: Nupur Garg Date: Mon, 23 Apr 2018 13:43:13 -0700 Subject: [PATCH 159/434] Updating freeze_graph dependencies. PiperOrigin-RevId: 193977096 --- tensorflow/python/BUILD | 1 + tensorflow/python/tools/BUILD | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 698e2a28bf..9dc03d7cdb 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -70,6 +70,7 @@ py_library( srcs_version = "PY2AND3", visibility = [ "//tensorflow:__pkg__", + "//tensorflow/python/tools:__pkg__", ], deps = [ ":array_ops", diff --git a/tensorflow/python/tools/BUILD b/tensorflow/python/tools/BUILD index 84d20f8e36..6c34b6aaf3 100644 --- a/tensorflow/python/tools/BUILD +++ b/tensorflow/python/tools/BUILD @@ -38,9 +38,9 @@ py_library( deps = [ ":saved_model_utils", "//tensorflow/core:protos_all_py", - "//tensorflow/python", # TODO(b/34059704): remove when fixed "//tensorflow/python:client", "//tensorflow/python:framework", + "//tensorflow/python:no_contrib", # TODO(b/34059704): remove when fixed "//tensorflow/python:parsing_ops", "//tensorflow/python:platform", "//tensorflow/python:training", -- GitLab From 105c7df01b12b77bc17909cfb4a0d0c0aff87571 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 23 Apr 2018 13:44:57 -0700 Subject: [PATCH 160/434] More relaxed size checking for TransposeConv, and miscellaneous bug fixes. PiperOrigin-RevId: 193977375 --- .../internal/optimized/optimized_ops.h | 3 + .../internal/reference/reference_ops.h | 3 + .../propagate_fixed_sizes.cc | 56 +++++++------------ .../resolve_constant_binary.cc | 7 ++- .../resolve_multiply_by_zero.cc | 5 ++ 5 files changed, 36 insertions(+), 38 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h index 49ce1133d3..d585bcca0e 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h @@ -5774,6 +5774,9 @@ inline void Pad(const T* input_data, const Dims<4>& input_dims, const std::vector& right_paddings, T* output_data, const Dims<4>& output_dims, const int32_t pad_value) { gemmlowp::ScopedProfilingLabel label("Pad"); + TFLITE_DCHECK_EQ(left_paddings.size(), 4); + TFLITE_DCHECK_EQ(right_paddings.size(), 4); + const int output_batch = ArraySize(output_dims, 3); const int output_height = ArraySize(output_dims, 2); const int output_width = ArraySize(output_dims, 1); diff --git a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h index d1d4f54f86..ae295cc8b5 100644 --- a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h @@ -3065,6 +3065,9 @@ inline void Pad(const T* input_data, const Dims<4>& input_dims, const std::vector& left_paddings, const std::vector& right_paddings, T* output_data, const Dims<4>& output_dims, const int32_t pad_value) { + TFLITE_DCHECK_EQ(left_paddings.size(), 4); + TFLITE_DCHECK_EQ(right_paddings.size(), 4); + const int output_batch = ArraySize(output_dims, 3); const int output_height = ArraySize(output_dims, 2); const int output_width = ArraySize(output_dims, 1); diff --git a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc index ba244cf5ef..7946492633 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc @@ -168,7 +168,9 @@ void ProcessConvOperator(Model* model, ConvOperator* op) { return; } const auto& input_shape = input_array.shape(); - CHECK_EQ(input_shape.dimensions_count(), 4); + CHECK(input_shape.dimensions_count() == 4) + << "Conv ops require 4D inputs. Input array \"" << op->inputs[0] + << "\" is " << input_shape.dimensions_count() << "D."; const auto& weights_array = model->GetArray(op->inputs[1]); // Yield until weights dims have been resolved. @@ -249,12 +251,6 @@ void ProcessTransposeConvOperator(Model* model, TransposeConvOperator* op) { << op->inputs[TransposeConvOperator::WEIGHTS] << "\" had shape " << toco::ShapeToString(weights_shape) << "."; - CHECK(weights_shape.dims(0) == 1 && weights_shape.dims(3) == 1) - << "TransposeConv weights dimensions must begin and end with 1. Input " - "weights \"" - << op->inputs[TransposeConvOperator::WEIGHTS] << "\" had shape " - << toco::ShapeToString(weights_shape) << "."; - // Compute padding const int kheight = weights_shape.dims(1); const int kwidth = weights_shape.dims(2); @@ -269,9 +265,7 @@ void ProcessTransposeConvOperator(Model* model, TransposeConvOperator* op) { LOG(FATAL) << "TransposeConv only supports SAME or VALID padding"; } - // VALIDATE OUTPUT SHAPE - // Compute the output shape from the input and weights shapes to verify it - // agrees with the specified output shape. + // VALIDATE some dimensions and set the output shape. const auto& input_array = model->GetArray(op->inputs[TransposeConvOperator::DATA_INPUT]); if (!input_array.has_shape()) { @@ -283,31 +277,13 @@ void ProcessTransposeConvOperator(Model* model, TransposeConvOperator* op) { << "TransposeConv input shape must have 4 dimensions. Input \"" << op->inputs[TransposeConvOperator::WEIGHTS] << "\" had shape " << toco::ShapeToString(weights_shape) << "."; + CHECK_EQ(input_shape.dims(3), weights_shape.dims(0)) + << "Input shape depth and weight depth do not agree"; - // Compute output shape - const int input_width = input_shape.dims(2); - const int input_height = input_shape.dims(1); - int output_height = op->stride_height * (input_height - 1); - int output_width = op->stride_width * (input_width - 1); - if (op->padding.type == PaddingType::kValid) { - output_height += kheight; - output_width += kwidth; - } else if (op->padding.type == PaddingType::kSame) { - output_height += 1; - output_width += 1; - } - - CHECK(specified_output_shape_array.GetBuffer().data == - std::vector({input_shape.dims(0), output_height, output_width, - weights_shape.dims(3)})) - << "Specified output shape: " << ShapeToString(output_array.shape()) - << ", does not agree with shape computed from input data and weights: [" - << input_shape.dims(0) << ", " << output_height << ", " << output_width - << ", " << weights_shape.dims(3) << "]."; - - // SUCCESS: Set the op's output shape according to the specified output shape. - *(output_array.mutable_shape()->mutable_dims()) = + // Set the output shape according to the specified output shape. + std::vector const& specified_output_shape = specified_output_shape_array.GetBuffer().data; + *(output_array.mutable_shape()->mutable_dims()) = specified_output_shape; } void ProcessDepthwiseConvOperator(Model* model, DepthwiseConvOperator* op) { @@ -1179,6 +1155,11 @@ void ProcessRankOperator(Model* model, RankOperator* op) { return; } + if (output_array.data_type == ArrayDataType::kNone) { + // Yield until the output type has been set by PropagateArrayDataTypes + return; + } + const auto& input_array = model->GetArray(op->inputs[0]); if (!input_array.has_shape()) { // Yield until input dims have been resolved. @@ -1200,6 +1181,11 @@ void ProcessShapeOperator(Model* model, TensorFlowShapeOperator* op) { return; } + if (output_array.data_type == ArrayDataType::kNone) { + // Yield until the output type has been set by PropagateArrayDataTypes + return; + } + const auto& input_array = model->GetArray(op->inputs[0]); if (!input_array.has_shape()) { // Yield until input dims have been resolved. @@ -1230,10 +1216,6 @@ void ProcessStackOperator(Model* model, StackOperator* op) { } Shape shape = input_array.shape(); - if (shape.dimensions_count() == 0) { - // Convert 0D scalars to 1D scalars of shape {1}. - shape.mutable_dims()->push_back(1); - } if (!stacked_shape) { stacked_shape.reset(new Shape(shape)); } else { diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_binary.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_binary.cc index 5e779f6765..6e78653fad 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_binary.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_binary.cc @@ -233,7 +233,12 @@ bool ResolveConstantBinaryOperator::Run(Model* model, std::size_t op_index) { } // Check that input data types agree. - CHECK(input0_array.data_type == input1_array.data_type); + CHECK(input0_array.data_type == input1_array.data_type) + << "Dissimilar data types given to op outputting \"" + << binary_op->outputs[0] << "\". 0:\"" << binary_op->inputs[0] << "\"(" + << static_cast(input0_array.data_type) << ") 1:\"" + << binary_op->inputs[1] << "\"(" + << static_cast(input1_array.data_type) << ")."; // Do the actual constants propagation EvaluateBinaryOperatorOnConstantInputs(model, binary_op); diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_multiply_by_zero.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_multiply_by_zero.cc index 37beb41dfc..4bb1217828 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_multiply_by_zero.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_multiply_by_zero.cc @@ -60,6 +60,11 @@ bool ResolveMultiplyByZero::Run(Model* model, std::size_t op_index) { const auto& output_array_name = mul_op->outputs[0]; auto& output_array = model->GetArray(output_array_name); + if (output_array.data_type == ArrayDataType::kNone) { + // Yield until the output type has been set by PropagateArrayDataTypes + return false; + } + // Yield if the output shape is not known yet. if (!output_array.has_shape()) { return false; -- GitLab From 5db49b64f244b89870aff89a13309796ae060620 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 23 Apr 2018 14:05:40 -0700 Subject: [PATCH 161/434] [XLA] Add xla_builder and xla_computation to every test targets that will be migrated. PiperOrigin-RevId: 193981015 --- tensorflow/compiler/xla/tests/BUILD | 89 +++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD index 1f90a44d8b..25bbde1677 100644 --- a/tensorflow/compiler/xla/tests/BUILD +++ b/tensorflow/compiler/xla/tests/BUILD @@ -153,6 +153,8 @@ tf_cc_binary( "//tensorflow/compiler/xla:util", "//tensorflow/compiler/xla/client:client_library", "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/compiler/xla/service/cpu:cpu_compiler", "//tensorflow/compiler/xla/service/llvm_ir:llvm_util", "//tensorflow/core:lib", @@ -191,6 +193,7 @@ cc_library( "//tensorflow/compiler/xla/client:global_data", "//tensorflow/compiler/xla/client:local_client", "//tensorflow/compiler/xla/client/xla_client:xla_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/compiler/xla/service:interpreter_plugin", # reference backend "//tensorflow/compiler/xla/service:platform_util", "//tensorflow/compiler/xla/tests:literal_test_util", @@ -288,6 +291,8 @@ xla_test( "//tensorflow/compiler/xla/client:computation", "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/compiler/xla/tests:client_library_test_base", "//tensorflow/compiler/xla/tests:xla_internal_test_main", "//tensorflow/core:lib", @@ -311,6 +316,8 @@ xla_test( "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:global_data", "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/compiler/xla/tests:client_library_test_base", "//tensorflow/compiler/xla/tests:xla_internal_test_main", "//tensorflow/core:test", @@ -330,6 +337,8 @@ xla_test( "//tensorflow/compiler/xla:xla_data_proto", "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/compiler/xla/tests:client_library_test_base", "//tensorflow/compiler/xla/tests:xla_internal_test_main", "//tensorflow/core:test", @@ -371,6 +380,8 @@ xla_test( "//tensorflow/compiler/xla:util", "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/compiler/xla/service:platform_util", "//tensorflow/compiler/xla/tests:client_library_test_base", "//tensorflow/compiler/xla/tests:test_utils", @@ -390,6 +401,7 @@ xla_test( "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:local_client", "//tensorflow/compiler/xla/client/xla_client:xla_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/compiler/xla/tests:client_library_test_base", "//tensorflow/compiler/xla/tests:literal_test_util", "//tensorflow/compiler/xla/tests:xla_internal_test_main", @@ -442,6 +454,8 @@ xla_test( "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:global_data", "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/compiler/xla/tests:client_library_test_base", "//tensorflow/compiler/xla/tests:literal_test_util", "//tensorflow/compiler/xla/tests:xla_internal_test_main", @@ -461,6 +475,8 @@ xla_test( "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:local_client", "//tensorflow/compiler/xla/client/lib:arithmetic", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/compiler/xla/tests:client_library_test_base", "//tensorflow/compiler/xla/tests:xla_internal_test_main", "//tensorflow/core:test", @@ -478,6 +494,8 @@ xla_test( "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:global_data", "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/compiler/xla/tests:client_library_test_base", "//tensorflow/compiler/xla/tests:literal_test_util", "//tensorflow/compiler/xla/tests:xla_internal_test_main", @@ -514,6 +532,8 @@ xla_test( "//tensorflow/compiler/xla/client:global_data", "//tensorflow/compiler/xla/client:local_client", "//tensorflow/compiler/xla/client/lib:arithmetic", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/compiler/xla/tests:client_library_test_base", "//tensorflow/compiler/xla/tests:literal_test_util", "//tensorflow/compiler/xla/tests:xla_internal_test_main", @@ -535,6 +555,8 @@ xla_test( "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:global_data", "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/compiler/xla/tests:client_library_test_base", "//tensorflow/compiler/xla/tests:literal_test_util", "//tensorflow/compiler/xla/tests:xla_internal_test_main", @@ -554,6 +576,8 @@ xla_test( "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:global_data", "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/compiler/xla/tests:client_library_test_base", "//tensorflow/compiler/xla/tests:xla_internal_test_main", "//tensorflow/core:lib", @@ -578,6 +602,8 @@ xla_test( "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:global_data", "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/compiler/xla/tests:client_library_test_base", "//tensorflow/compiler/xla/tests:xla_internal_test_main", "//tensorflow/core:lib", @@ -604,6 +630,7 @@ xla_test( "//tensorflow/compiler/xla/client:global_data", "//tensorflow/compiler/xla/client:local_client", "//tensorflow/compiler/xla/client/xla_client:xla_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/compiler/xla/tests:client_library_test_base", "//tensorflow/compiler/xla/tests:literal_test_util", "//tensorflow/compiler/xla/tests:xla_internal_test_main", @@ -670,6 +697,8 @@ xla_test( "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/compiler/xla/tests:client_library_test_base", "//tensorflow/compiler/xla/tests:literal_test_util", "//tensorflow/compiler/xla/tests:test_utils", @@ -715,6 +744,8 @@ xla_test( "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/compiler/xla/tests:client_library_test_base", "//tensorflow/compiler/xla/tests:literal_test_util", "//tensorflow/compiler/xla/tests:test_utils", @@ -738,6 +769,8 @@ xla_test( "//tensorflow/compiler/xla:xla_data_proto", "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/compiler/xla/tests:client_library_test_base", "//tensorflow/compiler/xla/tests:hlo_test_base", "//tensorflow/compiler/xla/tests:literal_test_util", @@ -760,6 +793,8 @@ xla_test( "//tensorflow/compiler/xla:xla_data_proto", "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/compiler/xla/tests:client_library_test_base", "//tensorflow/compiler/xla/tests:literal_test_util", "//tensorflow/compiler/xla/tests:xla_internal_test_main", @@ -813,6 +848,8 @@ xla_test( "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:local_client", "//tensorflow/compiler/xla/client:padding", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/compiler/xla/tests:client_library_test_base", "//tensorflow/compiler/xla/tests:literal_test_util", "//tensorflow/compiler/xla/tests:xla_internal_test_main", @@ -836,6 +873,8 @@ xla_test( "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:local_client", "//tensorflow/compiler/xla/client:padding", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/compiler/xla/tests:client_library_test_base", "//tensorflow/compiler/xla/tests:literal_test_util", "//tensorflow/compiler/xla/tests:xla_internal_test_main", @@ -898,6 +937,8 @@ xla_test( "//tensorflow/compiler/xla/client:global_data", "//tensorflow/compiler/xla/client:local_client", "//tensorflow/compiler/xla/client/lib:arithmetic", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/compiler/xla/service:hlo", "//tensorflow/compiler/xla/tests:client_library_test_base", "//tensorflow/compiler/xla/tests:hlo_test_base", @@ -923,6 +964,8 @@ xla_test( "//tensorflow/compiler/xla:test_helpers", "//tensorflow/compiler/xla/client:computation", "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/compiler/xla/tests:client_library_test_base", "//tensorflow/compiler/xla/tests:hlo_test_base", "//tensorflow/compiler/xla/tests:literal_test_util", @@ -963,6 +1006,8 @@ xla_test( "//tensorflow/compiler/xla:array3d", "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/compiler/xla/tests:client_library_test_base", "//tensorflow/compiler/xla/tests:literal_test_util", "//tensorflow/compiler/xla/tests:xla_internal_test_main", @@ -1038,6 +1083,8 @@ xla_test( "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:local_client", "//tensorflow/compiler/xla/client/lib:arithmetic", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/compiler/xla/tests:client_library_test_base", "//tensorflow/compiler/xla/tests:literal_test_util", "//tensorflow/compiler/xla/tests:xla_internal_test_main", @@ -1196,6 +1243,8 @@ xla_test( "//tensorflow/compiler/xla:xla_data_proto", "//tensorflow/compiler/xla/client:computation", "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/compiler/xla/tests:client_library_test_base", "//tensorflow/compiler/xla/tests:literal_test_util", "//tensorflow/compiler/xla/tests:xla_internal_test_main", @@ -1235,6 +1284,8 @@ xla_test( "//tensorflow/compiler/xla:reference_util", "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/compiler/xla/tests:client_library_test_base", "//tensorflow/compiler/xla/tests:literal_test_util", "//tensorflow/compiler/xla/tests:xla_internal_test_main", @@ -1256,6 +1307,8 @@ xla_test( "//tensorflow/compiler/xla:test", "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/compiler/xla/tests:client_library_test_base", "//tensorflow/compiler/xla/tests:literal_test_util", "//tensorflow/compiler/xla/tests:xla_internal_test_main", @@ -1294,6 +1347,8 @@ xla_test( deps = [ "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/compiler/xla/tests:client_library_test_base", "//tensorflow/compiler/xla/tests:literal_test_util", "//tensorflow/compiler/xla/tests:xla_internal_test_main", @@ -1310,6 +1365,8 @@ xla_test( deps = [ "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/compiler/xla/tests:client_library_test_base", "//tensorflow/compiler/xla/tests:literal_test_util", "//tensorflow/compiler/xla/tests:xla_internal_test_main", @@ -1335,6 +1392,8 @@ xla_test( "//tensorflow/compiler/xla/client:computation", "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/compiler/xla/tests:client_library_test_base", "//tensorflow/compiler/xla/tests:literal_test_util", "//tensorflow/compiler/xla/tests:test_utils", @@ -1355,6 +1414,8 @@ xla_test( "//tensorflow/compiler/xla:xla_data_proto", "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/compiler/xla/tests:client_library_test_base", "//tensorflow/compiler/xla/tests:xla_internal_test_main", "//tensorflow/core:lib", @@ -1428,6 +1489,8 @@ xla_test( "//tensorflow/compiler/xla/client:global_data", "//tensorflow/compiler/xla/client:local_client", "//tensorflow/compiler/xla/client/lib:arithmetic", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/compiler/xla/tests:client_library_test_base", "//tensorflow/compiler/xla/tests:literal_test_util", "//tensorflow/compiler/xla/tests:xla_internal_test_main", @@ -1472,6 +1535,8 @@ xla_test( "//tensorflow/compiler/xla:xla_data_proto", "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/compiler/xla/tests:client_library_test_base", "//tensorflow/compiler/xla/tests:literal_test_util", "//tensorflow/compiler/xla/tests:xla_internal_test_main", @@ -1514,6 +1579,8 @@ xla_test( "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:global_data", "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/compiler/xla/tests:client_library_test_base", "//tensorflow/compiler/xla/tests:literal_test_util", "//tensorflow/compiler/xla/tests:test_utils", @@ -1532,6 +1599,8 @@ xla_test( deps = [ "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/compiler/xla/tests:client_library_test_base", "//tensorflow/compiler/xla/tests:literal_test_util", "//tensorflow/compiler/xla/tests:xla_internal_test_main", @@ -1595,6 +1664,8 @@ xla_test( ":client_library_test_base", "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:global_data", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/compiler/xla/tests:xla_internal_test_main", "//tensorflow/core:test", ], @@ -1608,6 +1679,8 @@ xla_test( ":client_library_test_base", "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:global_data", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/compiler/xla/tests:xla_internal_test_main", "//tensorflow/core:test", ], @@ -1629,6 +1702,8 @@ xla_test( "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:global_data", "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/compiler/xla/service:session_proto", "//tensorflow/compiler/xla/tests:client_library_test_base", "//tensorflow/compiler/xla/tests:literal_test_util", @@ -1713,6 +1788,8 @@ xla_test( "//tensorflow/compiler/xla/client:computation", "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/compiler/xla/service:hlo", "//tensorflow/compiler/xla/service:hlo_runner", "//tensorflow/compiler/xla/service:platform_util", @@ -1740,6 +1817,8 @@ xla_test( "//tensorflow/compiler/xla/client:computation", "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/compiler/xla/service:hlo", "//tensorflow/compiler/xla/service:hlo_runner", "//tensorflow/compiler/xla/service:platform_util", @@ -1777,6 +1856,8 @@ xla_test( "//tensorflow/compiler/xla/client:computation", "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/compiler/xla/service:local_service", "//tensorflow/compiler/xla/service:shaped_buffer", "//tensorflow/compiler/xla/tests:literal_test_util", @@ -1802,6 +1883,8 @@ xla_test( "//tensorflow/compiler/xla/client:computation", "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/compiler/xla/service:device_memory_allocator", "//tensorflow/compiler/xla/service:local_service", "//tensorflow/compiler/xla/service:platform_util", @@ -1860,6 +1943,8 @@ xla_test( "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/compiler/xla/tests:client_library_test_base", "//tensorflow/compiler/xla/tests:hlo_test_base", "//tensorflow/compiler/xla/tests:literal_test_util", @@ -1886,6 +1971,8 @@ xla_test( "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:global_data", "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/compiler/xla/tests:client_library_test_base", "//tensorflow/compiler/xla/tests:literal_test_util", "//tensorflow/compiler/xla/tests:xla_internal_test_main", @@ -1982,6 +2069,8 @@ xla_test( ":test_utils", "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/compiler/xla/tests:xla_internal_test_main", "//tensorflow/core:test", ], -- GitLab From 01bc05347f430039c8efec10131b795178c9e302 Mon Sep 17 00:00:00 2001 From: Igor Saprykin Date: Mon, 23 Apr 2018 14:20:49 -0700 Subject: [PATCH 162/434] Run the canned estimator test on 2 GPUs as well. PiperOrigin-RevId: 193983700 --- .../contrib/distribute/python/estimator_integration_test.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/distribute/python/estimator_integration_test.py b/tensorflow/contrib/distribute/python/estimator_integration_test.py index c5a520ab5a..34410a6470 100644 --- a/tensorflow/contrib/distribute/python/estimator_integration_test.py +++ b/tensorflow/contrib/distribute/python/estimator_integration_test.py @@ -61,7 +61,8 @@ class DNNLinearCombinedClassifierIntegrationTest(test.TestCase, mode=['graph'], distribution=[ combinations.one_device_strategy, - combinations.mirrored_strategy_with_gpu_and_cpu + combinations.mirrored_strategy_with_gpu_and_cpu, + combinations.mirrored_strategy_with_two_gpus ])) def test_complete_flow_with_mode(self, distribution): label_dimension = 2 -- GitLab From d3b60b2210521a71961f675cb69bbe148b21b8da Mon Sep 17 00:00:00 2001 From: Yifei Feng <1192265+yifeif@users.noreply.github.com> Date: Mon, 23 Apr 2018 14:24:11 -0700 Subject: [PATCH 163/434] Reapply #18446. --- tensorflow/python/framework/test_util.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py index f954b9d6c7..5a8bc43727 100644 --- a/tensorflow/python/framework/test_util.py +++ b/tensorflow/python/framework/test_util.py @@ -1014,6 +1014,8 @@ class TensorFlowTestCase(googletest.TestCase): config.graph_options.optimizer_options.opt_level = -1 config.graph_options.rewrite_options.constant_folding = ( rewriter_config_pb2.RewriterConfig.OFF) + config.graph_options.rewrite_options.arithmetic_optimization = ( + rewriter_config_pb2.RewriterConfig.OFF) return config if graph is None: -- GitLab From 1d54aeb8e1f89ac0d13eacca1eac863476f4ee0a Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 23 Apr 2018 14:23:11 -0700 Subject: [PATCH 164/434] Simplified shape inference for queues PiperOrigin-RevId: 193984176 --- .../core/grappler/costs/graph_properties.cc | 16 ++++------------ .../core/grappler/costs/graph_properties.h | 2 +- 2 files changed, 5 insertions(+), 13 deletions(-) diff --git a/tensorflow/core/grappler/costs/graph_properties.cc b/tensorflow/core/grappler/costs/graph_properties.cc index a0125ce342..ca30ad83a0 100644 --- a/tensorflow/core/grappler/costs/graph_properties.cc +++ b/tensorflow/core/grappler/costs/graph_properties.cc @@ -1080,7 +1080,7 @@ Status GraphProperties::PropagateShapes( // fanout of the queues, we need to manually propagate the shapes from // enqueue node to the corresponding queue. TF_RETURN_IF_ERROR(UpdateResource(resource.first, resource.second, - shape_refiner, relax, new_shapes)); + shape_refiner, new_shapes)); } } while (!new_shapes->empty() && num_resource_iterations++ < max_resource_iterations); @@ -1094,7 +1094,7 @@ Status GraphProperties::PropagateShapes( Status GraphProperties::UpdateResource( const Node* qnode, const std::unordered_set& queue_inputs, - SymbolicShapeRefiner* shape_refiner, bool relax, TopoQueue* new_shapes) { + SymbolicShapeRefiner* shape_refiner, TopoQueue* new_shapes) { // Proceed only if qnode is a queue or an Enter with queue input. if (!IsQueue(*qnode) && !IsEnterWithQueue(*qnode)) { return Status::OK(); @@ -1108,9 +1108,6 @@ Status GraphProperties::UpdateResource( // Merge all inputs into the enqueue node, regardless of which phase we // are in. std::vector queue_shapes_and_types; - if (queue_handle_data) { - queue_shapes_and_types = *queue_handle_data; - } for (const auto& node : queue_inputs) { auto ctx = shape_refiner->GetContext(node); if (!ctx) { @@ -1126,13 +1123,8 @@ Status GraphProperties::UpdateResource( if (queue_shapes_and_types.empty()) { queue_shapes_and_types = shapes_and_types; } else { - if (relax) { - TF_RETURN_IF_ERROR(RelaxEnqueueShapesAndMergeTypes( - shape_refiner, qnode, shapes_and_types, &queue_shapes_and_types)); - } else { - TF_RETURN_IF_ERROR(MergeEnqueueShapesAndTypes( - shape_refiner, qnode, shapes_and_types, &queue_shapes_and_types)); - } + TF_RETURN_IF_ERROR(RelaxEnqueueShapesAndMergeTypes( + shape_refiner, qnode, shapes_and_types, &queue_shapes_and_types)); } } } diff --git a/tensorflow/core/grappler/costs/graph_properties.h b/tensorflow/core/grappler/costs/graph_properties.h index 4c3f3f5f53..a4e3031db1 100644 --- a/tensorflow/core/grappler/costs/graph_properties.h +++ b/tensorflow/core/grappler/costs/graph_properties.h @@ -93,7 +93,7 @@ class GraphProperties { // enqueue its fanout in 'new_shapes'. static Status UpdateResource( const Node* qnode, const std::unordered_set& queue_inputs, - SymbolicShapeRefiner* shape_refiner, bool relax, TopoQueue* new_shapes); + SymbolicShapeRefiner* shape_refiner, TopoQueue* new_shapes); // Update the output shapes of a Merge node, and enqueue its fanout in // new_shapes if needed. -- GitLab From d12244894aa0cdd068b46ebed407ced1915272b2 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 23 Apr 2018 14:39:53 -0700 Subject: [PATCH 165/434] Use %zu instead of %lu since size_t is not an unsigned long on 32-bit. PiperOrigin-RevId: 193987261 --- tensorflow/contrib/lite/optional_debug_tools.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/lite/optional_debug_tools.cc b/tensorflow/contrib/lite/optional_debug_tools.cc index e0a0910117..dfdd80ea8a 100644 --- a/tensorflow/contrib/lite/optional_debug_tools.cc +++ b/tensorflow/contrib/lite/optional_debug_tools.cc @@ -72,7 +72,7 @@ const char* AllocTypeName(TfLiteAllocationType type) { // Prints a dump of what tensors and what nodes are in the interpreter. void PrintInterpreterState(Interpreter* interpreter) { - printf("Interpreter has %lu tensors and %lu nodes\n", + printf("Interpreter has %zu tensors and %zu nodes\n", interpreter->tensors_size(), interpreter->nodes_size()); printf("Inputs:"); PrintIntVector(interpreter->inputs()); -- GitLab From f97fec3cf5d361103d21989b78a74dd1820620d8 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 23 Apr 2018 14:58:58 -0700 Subject: [PATCH 166/434] Refactoring triangular_solve.cc to use the new common utility functions. PiperOrigin-RevId: 193990473 --- .../compiler/tf2xla/lib/triangular_solve.cc | 82 ++++++------------- 1 file changed, 25 insertions(+), 57 deletions(-) diff --git a/tensorflow/compiler/tf2xla/lib/triangular_solve.cc b/tensorflow/compiler/tf2xla/lib/triangular_solve.cc index 7f72a6073d..9bf5821b54 100644 --- a/tensorflow/compiler/tf2xla/lib/triangular_solve.cc +++ b/tensorflow/compiler/tf2xla/lib/triangular_solve.cc @@ -83,15 +83,6 @@ xla::StatusOr TriangularSolve( block_size); } - // Returns [b1, b2, ... , bn, indices[0], indices[1]]. - auto prepend_batch_dims = [&](std::array indices) { - std::vector output(ndims); - std::copy(batch_dimensions.begin(), batch_dimensions.end(), output.begin()); - std::copy(indices.begin(), indices.end(), - output.begin() + batch_dimensions.size()); - return output; - }; - // Applies a complex conjugation operation if `a` is complex and `conjugate_a` // is true, otherwise returns its argument. auto maybe_conj = [&](xla::ComputationBuilder* builder, @@ -108,11 +99,12 @@ xla::StatusOr TriangularSolve( std::unique_ptr sub = builder->CreateSubBuilder( tensorflow::strings::StrCat("trsm_base_", k)); - auto a_param = - sub->Parameter(0, - xla::ShapeUtil::MakeShape(b_shape->element_type(), - prepend_batch_dims({k, k})), - "a"); + auto a_param = sub->Parameter( + 0, + xla::ShapeUtil::MakeShape( + b_shape->element_type(), + PrependMajorDims(sub.get(), batch_dimensions, {k, k})), + "a"); std::array b_lastd; if (left_side) { @@ -120,11 +112,12 @@ xla::StatusOr TriangularSolve( } else { b_lastd = {m, k}; } - auto b_param = - sub->Parameter(1, - xla::ShapeUtil::MakeShape(b_shape->element_type(), - prepend_batch_dims(b_lastd)), - "b"); + auto b_param = sub->Parameter( + 1, + xla::ShapeUtil::MakeShape( + b_shape->element_type(), + PrependMajorDims(sub.get(), batch_dimensions, b_lastd)), + "b"); // We use a left-looking subroutine on the block diagonal in some common // cases, while falling back to a recursive call in unsupported cases. The @@ -380,14 +373,6 @@ xla::StatusOr TriangularSolveLeftLooking( batch_dimensions.push_back(a_size); } - auto prepend_batch_dims = [&](std::array indices) { - std::vector output(ndims); - std::copy(batch_dimensions.begin(), batch_dimensions.end(), output.begin()); - std::copy(indices.begin(), indices.end(), - output.begin() + batch_dimensions.size()); - return output; - }; - auto maybe_conj = [&](xla::ComputationBuilder* builder, xla::ComputationDataHandle x) { auto perform_conj = a_shape->element_type() == xla::C64 && conjugate_a; @@ -479,30 +464,6 @@ xla::StatusOr TriangularSolveLeftLooking( auto body_b = bodyb->GetTupleElement(input_tuple, 3); auto zero = bodyb->ConstantR0(0); - // Set up some helper functions. - auto prepend_zeros = [&](std::array starts) { - auto zero = bodyb->Reshape(bodyb->ConstantR0(0), {1}); - std::vector padded_starts(ndims, zero); - padded_starts[ndims - 2] = bodyb->Reshape(starts[0], {1}); - padded_starts[ndims - 1] = bodyb->Reshape(starts[1], {1}); - return bodyb->ConcatInDim(padded_starts, 0); - }; - - auto dynamic_slice = [&](xla::ComputationDataHandle x, - std::array starts, - std::array sizes) { - auto padded_starts = prepend_zeros(starts); - auto padded_sizes = prepend_batch_dims(sizes); - return bodyb->DynamicSlice(x, padded_starts, padded_sizes); - }; - - auto update = [&](xla::ComputationDataHandle x, - xla::ComputationDataHandle update, - std::array starts) { - auto padded_starts = prepend_zeros(starts); - return bodyb->DynamicUpdateSlice(x, update, padded_starts); - }; - // We'd like to implement this: // if transpose_a: // a_row = T(a[..., i+1:, i:i+1]) @@ -516,22 +477,29 @@ xla::StatusOr TriangularSolveLeftLooking( // all zeros and use that as zero-padding (doing unnecessary FLOPs). xla::ComputationDataHandle a_row; if (transpose_a) { - a_row = dynamic_slice(body_a, {zero, i}, {m, 1}); + TF_ASSIGN_OR_RETURN(a_row, DynamicSliceInMinorDims(bodyb.get(), body_a, + {zero, i}, {m, 1})); } else { - a_row = dynamic_slice(body_a, {i, zero}, {1, m}); + TF_ASSIGN_OR_RETURN(a_row, DynamicSliceInMinorDims(bodyb.get(), body_a, + {i, zero}, {1, m})); } TF_ASSIGN_OR_RETURN(auto b_update, BatchDot(bodyb.get(), a_row, body_out, /*transpose_x=*/transpose_a, /*transpose_y=*/false, /*conjugate_x=*/conjugate_a, /*conjugate_y=*/false)); - auto result_row = - bodyb->Sub(dynamic_slice(body_b, {i, zero}, {1, n}), b_update); + TF_ASSIGN_OR_RETURN( + auto result_row_slice, + DynamicSliceInMinorDims(bodyb.get(), body_b, {i, zero}, {1, n})); + auto result_row = bodyb->Sub(result_row_slice, b_update); // body_out[..., i:i+1, :] = result_row / a[..., i:i+1, i:i+1] - auto a_elt = dynamic_slice(body_a, {i, i}, {1, 1}); + TF_ASSIGN_OR_RETURN(auto a_elt, DynamicSliceInMinorDims(bodyb.get(), body_a, + {i, i}, {1, 1})); auto div_result = bodyb->Div(result_row, maybe_conj(bodyb.get(), a_elt)); - body_out = update(body_out, div_result, {i, zero}); + TF_ASSIGN_OR_RETURN(body_out, + DynamicUpdateSliceInMinorDims(bodyb.get(), body_out, + div_result, {i, zero})); // if transpose_a: // return (i - 1, body_out, a, b) -- GitLab From 6f6c75a7673cd73dfbaaba3f259ce9ab5c8086a1 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 23 Apr 2018 15:00:43 -0700 Subject: [PATCH 167/434] [XLA] Redesign: migrate xla/tests/a*, xla/tests/b*. PiperOrigin-RevId: 193990756 --- .../xla/tests/array_elementwise_ops_test.cc | 27 +++--- .../compiler/xla/tests/axpy_simple_test.cc | 5 +- .../tests/bad_rng_shape_validation_test.cc | 12 +-- .../compiler/xla/tests/bfloat16_test.cc | 13 ++- .../compiler/xla/tests/binop_scaling_test.cc | 14 ++-- .../xla/tests/broadcast_simple_test.cc | 82 +++++++++---------- .../xla/tests/client_library_test_base.cc | 8 ++ .../xla/tests/client_library_test_base.h | 3 + 8 files changed, 84 insertions(+), 80 deletions(-) diff --git a/tensorflow/compiler/xla/tests/array_elementwise_ops_test.cc b/tensorflow/compiler/xla/tests/array_elementwise_ops_test.cc index 4b4dc6dd9d..e8a5efe796 100644 --- a/tensorflow/compiler/xla/tests/array_elementwise_ops_test.cc +++ b/tensorflow/compiler/xla/tests/array_elementwise_ops_test.cc @@ -22,7 +22,6 @@ limitations under the License. #include "tensorflow/compiler/xla/array2d.h" #include "tensorflow/compiler/xla/array3d.h" #include "tensorflow/compiler/xla/array4d.h" -#include "tensorflow/compiler/xla/client/computation_builder.h" #include "tensorflow/compiler/xla/client/global_data.h" #include "tensorflow/compiler/xla/client/local_client.h" #include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" @@ -214,7 +213,7 @@ XLA_TEST_F(ArrayElementwiseOpTest, AddTwoConstantZeroElementC64s) { } XLA_TEST_F(ArrayElementwiseOpTest, AddTwoConstantU64s) { - ComputationBuilder b(client_, TestName()); + XlaBuilder b(TestName()); std::vector lhs{0xFFFFFFFF, static_cast(-1), @@ -255,7 +254,7 @@ XLA_TEST_F(ArrayElementwiseOpTest, AddTwoConstantU64s) { } XLA_TEST_F(ArrayElementwiseOpTest, SubTwoConstantS64s) { - ComputationBuilder b(client_, TestName()); + XlaBuilder b(TestName()); std::vector lhs{static_cast(0x8000000000000000LL), static_cast(0x8000000000000000LL), @@ -1332,7 +1331,7 @@ XLA_TEST_F(ArrayElementwiseOpTest, PowZeroElementF32s) { // Some Pow cases that can be implemented more efficiently. XLA_TEST_F(ArrayElementwiseOpTest, PowSpecialF32) { - ComputationBuilder b(client_, TestName()); + XlaBuilder b(TestName()); std::vector values = {1.0f, 2.0f, 3.2f, -4.0f}; std::vector exponents = {0.0f, 1.0f, 2.0f, 0.5f, -1.0f, -0.5f}; @@ -1360,7 +1359,7 @@ XLA_TEST_F(ArrayElementwiseOpTest, PowSpecialF32) { } XLA_TEST_F(ArrayElementwiseOpTest, PowOfExpF32) { - ComputationBuilder b(client_, TestName()); + XlaBuilder b(TestName()); std::vector values0 = {1.0f, 2.0f, 3.2f, -4.0f, 0.0f, 5.7f}; std::vector values1 = {0.0f, 1.0f, 2.0f, 0.5f, -1.0f, -0.5f}; @@ -1385,7 +1384,7 @@ XLA_TEST_F(ArrayElementwiseOpTest, PowOfExpF32) { } XLA_TEST_F(ArrayElementwiseOpTest, LogOfPowerF32) { - ComputationBuilder b(client_, TestName()); + XlaBuilder b(TestName()); std::vector values0 = {1.0f, 2.0f, 3.2f, 4.0f, 0.5f, 5.7f}; std::vector values1 = {0.0f, 1.0f, 2.0f, 0.5f, -1.0f, -0.5f}; @@ -1410,7 +1409,7 @@ XLA_TEST_F(ArrayElementwiseOpTest, LogOfPowerF32) { } XLA_TEST_F(ArrayElementwiseOpTest, MulOfExpF32) { - ComputationBuilder b(client_, TestName()); + XlaBuilder b(TestName()); std::vector values0 = {1.0f, 2.0f, 3.2f, -4.0f, 0.0f, 5.7f}; std::vector values1 = {0.0f, 1.0f, 2.0f, 0.5f, -1.0f, -0.5f}; @@ -1435,7 +1434,7 @@ XLA_TEST_F(ArrayElementwiseOpTest, MulOfExpF32) { } XLA_TEST_F(ArrayElementwiseOpTest, DivOfExpF32) { - ComputationBuilder b(client_, TestName()); + XlaBuilder b(TestName()); std::vector values0 = {1.0f, 2.0f, 3.2f, -4.0f, 0.0f, 5.7f}; std::vector values1 = {0.0f, 1.0f, 2.0f, 0.5f, -1.0f, -0.5f}; @@ -1460,7 +1459,7 @@ XLA_TEST_F(ArrayElementwiseOpTest, DivOfExpF32) { } XLA_TEST_F(ArrayElementwiseOpTest, Div3_lhs_F32) { - ComputationBuilder b(client_, TestName()); + XlaBuilder b(TestName()); std::vector values0 = {1.0f, 2.0f, 3.2f, -4.0f, 0.45f, 5.7f}; std::vector values1 = {0.1f, 1.0f, 2.0f, 0.5f, -1.0f, -0.5f}; @@ -1492,7 +1491,7 @@ XLA_TEST_F(ArrayElementwiseOpTest, Div3_lhs_F32) { } XLA_TEST_F(ArrayElementwiseOpTest, Div3_rhs_F32) { - ComputationBuilder b(client_, TestName()); + XlaBuilder b(TestName()); std::vector values0 = {1.0f, 2.0f, 3.2f, -4.0f, 0.45f, 5.7f}; std::vector values1 = {0.1f, 1.0f, 2.0f, 0.5f, -1.0f, -0.5f}; @@ -1525,7 +1524,7 @@ XLA_TEST_F(ArrayElementwiseOpTest, Div3_rhs_F32) { } XLA_TEST_F(ArrayElementwiseOpTest, DivOfPowerF32) { - ComputationBuilder b(client_, TestName()); + XlaBuilder b(TestName()); std::vector values0 = {1.0f, 2.0f, 3.2f, -4.0f, 0.45f, 5.7f}; std::vector values1 = {0.1f, 1.0f, 2.0f, 0.5f, 1.0f, 0.5f}; @@ -1558,7 +1557,7 @@ XLA_TEST_F(ArrayElementwiseOpTest, DivOfPowerF32) { } XLA_TEST_F(ArrayElementwiseOpTest, Div4F32) { - ComputationBuilder b(client_, TestName()); + XlaBuilder b(TestName()); std::vector values0 = {1.0f, 2.0f, 3.2f, -4.0f, 0.45f, 5.7f}; std::vector values1 = {0.1f, 1.0f, 2.0f, 0.5f, -1.0f, -0.5f}; @@ -2357,7 +2356,7 @@ XLA_TEST_F(ArrayElementwiseOpTest, Add1DTo2DF32) { XLA_TEST_F(ArrayElementwiseOpTest, Compare1DTo2DS32Eq) { // Test broadcasting in Eq comparison. - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto v = builder.ConstantR1({42, 73}); auto m = builder.ConstantR2({{42, 73}, {42, 52}}); @@ -2783,7 +2782,7 @@ XLA_TEST_F(ArrayElementwiseOpTest, NonIdentityBroadcastOfSameRankIsDisallowed) { // Regression test for b/31927799. "slice - y" is fused and requires implicit // broadcast. XLA_TEST_F(ArrayElementwiseOpTest, ImplictBroadcastInFusedExpressions) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto x_literal = Literal::CreateR1({1, 2, 3}); auto y_literal = Literal::CreateR1({4, 5}); auto x_data = client_->TransferToServer(*x_literal).ConsumeValueOrDie(); diff --git a/tensorflow/compiler/xla/tests/axpy_simple_test.cc b/tensorflow/compiler/xla/tests/axpy_simple_test.cc index ec3b46acfe..fcd9ff55e3 100644 --- a/tensorflow/compiler/xla/tests/axpy_simple_test.cc +++ b/tensorflow/compiler/xla/tests/axpy_simple_test.cc @@ -15,7 +15,6 @@ limitations under the License. #include -#include "tensorflow/compiler/xla/client/computation_builder.h" #include "tensorflow/compiler/xla/client/local_client.h" #include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" #include "tensorflow/compiler/xla/tests/client_library_test_base.h" @@ -42,7 +41,7 @@ TEST_F(AxpySimpleTest, AxTenValues) { } XLA_TEST_F(AxpySimpleTest, AxpyZeroValues) { - ComputationBuilder builder(client_, "axpy_10"); + XlaBuilder builder("axpy_10"); auto alpha = builder.ConstantR0(3.1415926535); auto x = builder.ConstantR1({}); auto y = builder.ConstantR1({}); @@ -54,7 +53,7 @@ XLA_TEST_F(AxpySimpleTest, AxpyZeroValues) { } TEST_F(AxpySimpleTest, AxpyTenValues) { - ComputationBuilder builder(client_, "axpy_10"); + XlaBuilder builder("axpy_10"); auto alpha = builder.ConstantR0(3.1415926535); auto x = builder.ConstantR1( {-1.0, 1.0, 2.0, -2.0, -3.0, 3.0, 4.0, -4.0, -5.0, 5.0}); diff --git a/tensorflow/compiler/xla/tests/bad_rng_shape_validation_test.cc b/tensorflow/compiler/xla/tests/bad_rng_shape_validation_test.cc index e4bf1827ac..22c3394e6f 100644 --- a/tensorflow/compiler/xla/tests/bad_rng_shape_validation_test.cc +++ b/tensorflow/compiler/xla/tests/bad_rng_shape_validation_test.cc @@ -18,9 +18,9 @@ limitations under the License. #include -#include "tensorflow/compiler/xla/client/computation.h" -#include "tensorflow/compiler/xla/client/computation_builder.h" #include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_computation.h" #include "tensorflow/compiler/xla/statusor.h" #include "tensorflow/compiler/xla/test.h" #include "tensorflow/compiler/xla/tests/client_library_test_base.h" @@ -34,13 +34,13 @@ namespace { class BadRngShapeValidationTest : public ClientLibraryTestBase {}; TEST_F(BadRngShapeValidationTest, DefaultConstructedShapeCreatesError) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto zero = builder.ConstantR0(0.0); auto one = builder.ConstantR0(1.0); Shape default_constructed; builder.RngUniform(zero, one, default_constructed); - StatusOr computation = builder.Build(); + StatusOr computation = builder.Build(); EXPECT_FALSE(computation.ok()); LOG(INFO) << "status received: " << computation.status(); EXPECT_THAT(computation.status().error_message(), @@ -48,7 +48,7 @@ TEST_F(BadRngShapeValidationTest, DefaultConstructedShapeCreatesError) { } TEST_F(BadRngShapeValidationTest, ShapeWithoutLayoutIsOk) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto zero = builder.ConstantR0(0.0); auto one = builder.ConstantR0(1.0); Shape sans_layout; @@ -57,7 +57,7 @@ TEST_F(BadRngShapeValidationTest, ShapeWithoutLayoutIsOk) { builder.RngUniform(zero, one, sans_layout); - StatusOr computation = builder.Build(); + StatusOr computation = builder.Build(); ASSERT_TRUE(computation.ok()); LOG(INFO) << computation.status(); } diff --git a/tensorflow/compiler/xla/tests/bfloat16_test.cc b/tensorflow/compiler/xla/tests/bfloat16_test.cc index b853dfaa15..4e65cf11f3 100644 --- a/tensorflow/compiler/xla/tests/bfloat16_test.cc +++ b/tensorflow/compiler/xla/tests/bfloat16_test.cc @@ -19,10 +19,9 @@ limitations under the License. #include "tensorflow/compiler/xla/array2d.h" #include "tensorflow/compiler/xla/array4d.h" -#include "tensorflow/compiler/xla/client/computation.h" -#include "tensorflow/compiler/xla/client/computation_builder.h" #include "tensorflow/compiler/xla/client/lib/arithmetic.h" #include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/reference_util.h" #include "tensorflow/compiler/xla/service/hlo_computation.h" @@ -52,7 +51,7 @@ class Bfloat16Test : public ClientLibraryTestBase { }; XLA_TEST_F(Bfloat16Test, ScalarOperation) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto x = builder.ConstantR0(static_cast(2.0f)); auto y = builder.ConstantR0(static_cast(1.0f)); builder.Add(x, y); @@ -62,7 +61,7 @@ XLA_TEST_F(Bfloat16Test, ScalarOperation) { } XLA_TEST_F(Bfloat16Test, LogOperation) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto x = builder.ConstantR0(static_cast(4.0f)); builder.Log(x); @@ -71,7 +70,7 @@ XLA_TEST_F(Bfloat16Test, LogOperation) { } XLA_TEST_F(Bfloat16Test, NegateScalarF16) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); builder.Neg(builder.ConstantR0(static_cast(2.1f))); ComputeAndCompareR0(&builder, static_cast(-2.1f), {}, @@ -80,7 +79,7 @@ XLA_TEST_F(Bfloat16Test, NegateScalarF16) { XLA_TEST_F(Bfloat16Test, BatchNormTraining) { const int kFeatureIndex = 2; - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto operand = builder.ConstantR4FromArray4D( {{{{static_cast(1.f)}, {static_cast(2.f)}}, @@ -117,7 +116,7 @@ XLA_TEST_F(Bfloat16Test, BatchNormTraining) { XLA_TEST_F(Bfloat16Test, BatchNormGrad) { const int kFeatureIndex = 2; - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto operand = builder.ConstantR4FromArray4D( Array4D(2, 2, 2, 1, static_cast(0.0f))); diff --git a/tensorflow/compiler/xla/tests/binop_scaling_test.cc b/tensorflow/compiler/xla/tests/binop_scaling_test.cc index 97fec89b63..48203b1d40 100644 --- a/tensorflow/compiler/xla/tests/binop_scaling_test.cc +++ b/tensorflow/compiler/xla/tests/binop_scaling_test.cc @@ -15,8 +15,8 @@ limitations under the License. #include "tensorflow/compiler/xla/array2d.h" #include "tensorflow/compiler/xla/array4d.h" -#include "tensorflow/compiler/xla/client/computation_builder.h" #include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" #include "tensorflow/compiler/xla/reference_util.h" #include "tensorflow/compiler/xla/tests/client_library_test_base.h" #include "tensorflow/compiler/xla/tests/literal_test_util.h" @@ -32,7 +32,7 @@ TEST_F(BinopScalingTest, MatrixPlusPseudoMatrixRowVector_32x4) { auto alhs = MakeLinspaceArray2D(0.0, 1.0, 32, 4); auto arhs = MakeLinspaceArray2D(0.0, 1.0, 1, 4); - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto lhs = builder.ConstantR2FromArray2D(*alhs); auto rhs = builder.ConstantR2FromArray2D(*arhs); builder.Add(lhs, rhs); @@ -48,7 +48,7 @@ TEST_F(BinopScalingTest, MatrixPlusPseudoMatrixRowVector_129x129) { auto alhs = MakeLinspaceArray2D(0.0, 1.0, 129, 129); auto arhs = MakeLinspaceArray2D(0.0, 1.0, 1, 129); - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto lhs = builder.ConstantR2FromArray2D(*alhs); auto rhs = builder.ConstantR2FromArray2D(*arhs); builder.Add(lhs, rhs); @@ -64,7 +64,7 @@ TEST_F(BinopScalingTest, MatrixPlusPseudoMatrixColVector_9x5) { auto alhs = MakeLinspaceArray2D(0.0, 1.0, 9, 5); auto arhs = MakeLinspaceArray2D(0.0, 1.0, 9, 1); - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto lhs = builder.ConstantR2FromArray2D(*alhs); auto rhs = builder.ConstantR2FromArray2D(*arhs); builder.Add(lhs, rhs); @@ -80,7 +80,7 @@ TEST_F(BinopScalingTest, MatrixPlusPseudoMatrixColVector_129x257) { auto alhs = MakeLinspaceArray2D(0.0, 1.0, 129, 257); auto arhs = MakeLinspaceArray2D(0.0, 1.0, 129, 1); - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto lhs = builder.ConstantR2FromArray2D(*alhs); auto rhs = builder.ConstantR2FromArray2D(*arhs); builder.Add(lhs, rhs); @@ -93,7 +93,7 @@ TEST_F(BinopScalingTest, MatrixPlusPseudoMatrixColVector_129x257) { } TEST_F(BinopScalingTest, R0PlusR2F32) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto lhs = builder.ConstantR0(42.0); auto rhs = builder.ConstantR2({ {1.0, 2.0}, {3.0, 4.0}, @@ -109,7 +109,7 @@ TEST_F(BinopScalingTest, R0PlusR2F32) { } TEST_F(BinopScalingTest, R4PlusR0S32) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); // clang-format off Array4D lhs_array({ {{{1, 2}, diff --git a/tensorflow/compiler/xla/tests/broadcast_simple_test.cc b/tensorflow/compiler/xla/tests/broadcast_simple_test.cc index 97095f1cc4..34c86e007b 100644 --- a/tensorflow/compiler/xla/tests/broadcast_simple_test.cc +++ b/tensorflow/compiler/xla/tests/broadcast_simple_test.cc @@ -19,8 +19,8 @@ limitations under the License. #include "tensorflow/compiler/xla/array2d.h" #include "tensorflow/compiler/xla/array4d.h" -#include "tensorflow/compiler/xla/client/computation_builder.h" #include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/statusor.h" #include "tensorflow/compiler/xla/test.h" @@ -33,10 +33,8 @@ namespace { class BroadcastSimpleTest : public ClientLibraryTestBase { public: - ComputationDataHandle BuildBinOp(HloOpcode op, - const ComputationDataHandle& lhs, - const ComputationDataHandle& rhs, - ComputationBuilder* builder) { + XlaOp BuildBinOp(HloOpcode op, const XlaOp& lhs, const XlaOp& rhs, + XlaBuilder* builder) { switch (op) { case HloOpcode::kMinimum: { return builder->Min(lhs, rhs); @@ -105,21 +103,21 @@ class BroadcastSimpleTest : public ClientLibraryTestBase { using ::testing::HasSubstr; XLA_TEST_F(BroadcastSimpleTest, ScalarNoOpBroadcast) { - ComputationBuilder b(client_, TestName()); + XlaBuilder b(TestName()); b.Broadcast(b.ConstantR0(1.5), {}); ComputeAndCompareR0(&b, 1.5, {}, ErrorSpec(0.0001)); } XLA_TEST_F(BroadcastSimpleTest, ScalarTo2D_2x3) { - ComputationBuilder b(client_, TestName()); + XlaBuilder b(TestName()); b.Broadcast(b.ConstantR0(2.25), {2, 3}); Array2D expected(2, 3, 2.25); ComputeAndCompareR2(&b, expected, {}, ErrorSpec(0.0001)); } XLA_TEST_F(BroadcastSimpleTest, ScalarParamTo2D_2x3) { - ComputationBuilder b(client_, TestName()); - ComputationDataHandle src; + XlaBuilder b(TestName()); + XlaOp src; std::unique_ptr param_data = CreateR0Parameter(2.25f, /*parameter_number=*/0, /*name=*/"src", /*builder=*/&b, /*data_handle=*/&src); @@ -131,21 +129,21 @@ XLA_TEST_F(BroadcastSimpleTest, ScalarParamTo2D_2x3) { } XLA_TEST_F(BroadcastSimpleTest, ScalarTo2D_2x0) { - ComputationBuilder b(client_, TestName()); + XlaBuilder b(TestName()); b.Broadcast(b.ConstantR0(2.25), {2, 0}); Array2D expected(2, 0); ComputeAndCompareR2(&b, expected, {}, ErrorSpec(0.0001)); } XLA_TEST_F(BroadcastSimpleTest, ScalarTo2D_0x2) { - ComputationBuilder b(client_, TestName()); + XlaBuilder b(TestName()); b.Broadcast(b.ConstantR0(2.25), {0, 2}); Array2D expected(0, 2); ComputeAndCompareR2(&b, expected, {}, ErrorSpec(0.0001)); } XLA_TEST_F(BroadcastSimpleTest, 1DTo2D) { - ComputationBuilder b(client_, TestName()); + XlaBuilder b(TestName()); b.Broadcast(b.ConstantR1({1, 2, 3}), {2}); Array2D expected(2, 3); @@ -160,7 +158,7 @@ XLA_TEST_F(BroadcastSimpleTest, 1DTo2D) { // Tests implicit broadcasting of PREDs. XLA_TEST_F(BroadcastSimpleTest, BooleanAnd2DTo3D_Pred) { - ComputationBuilder b(client_, TestName()); + XlaBuilder b(TestName()); Array2D x_vals(2, 1); x_vals(0, 0) = true; @@ -171,7 +169,7 @@ XLA_TEST_F(BroadcastSimpleTest, BooleanAnd2DTo3D_Pred) { y_vals(1, 0, 0) = true; y_vals(1, 1, 0) = true; - ComputationDataHandle x, y; + XlaOp x, y; auto x_data = CreateR2Parameter(x_vals, 0, "x", &b, &x); auto y_data = CreateR3Parameter(y_vals, 1, "y", &b, &y); b.And(x, y, /*broadcast_dimensions=*/{1, 2}); @@ -186,7 +184,7 @@ XLA_TEST_F(BroadcastSimpleTest, BooleanAnd2DTo3D_Pred) { } XLA_TEST_F(BroadcastSimpleTest, ZeroElement_1DTo2D) { - ComputationBuilder b(client_, TestName()); + XlaBuilder b(TestName()); b.Broadcast(b.ConstantR1({}), {2}); Array2D expected(2, 0); @@ -194,7 +192,7 @@ XLA_TEST_F(BroadcastSimpleTest, ZeroElement_1DTo2D) { } XLA_TEST_F(BroadcastSimpleTest, 1DToZeroElement2D) { - ComputationBuilder b(client_, TestName()); + XlaBuilder b(TestName()); b.Broadcast(b.ConstantR1({1, 2, 3}), {0}); Array2D expected(0, 3); @@ -209,7 +207,7 @@ XLA_TEST_F(BroadcastSimpleTest, InDimensionAndDegenerateBroadcasting) { // broadcasting (broadcast_dimensions {1, 2}), then is added to the rhs shape // [2, 3, 1]. Degenerate dimension broadcasting then broadcasts the size one // dimensions. - ComputationBuilder b(client_, TestName()); + XlaBuilder b(TestName()); b.Add(b.ConstantR2({{1.0, 5.0}}), b.ConstantLiteral(*Literal::CreateR3( @@ -247,7 +245,7 @@ class BroadcastR3ImplicitTest XLA_TEST_P(BroadcastR3ImplicitTest, Doit) { const R3ImplicitBroadcastSpec& spec = GetParam(); - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); Shape r3_shape, r3_implicit_shape; Array3D r3_array(spec.output_bounds[0], spec.output_bounds[1], @@ -264,8 +262,7 @@ XLA_TEST_P(BroadcastR3ImplicitTest, Doit) { auto r3_implicit_parameter = builder.Parameter(0, r3_implicit_shape, "input"); auto r3_parameter = builder.Parameter(1, r3_shape, "input"); - ComputationDataHandle op = - BuildBinOp(spec.op, r3_implicit_parameter, r3_parameter, &builder); + XlaOp op = BuildBinOp(spec.op, r3_implicit_parameter, r3_parameter, &builder); Array3D expected_array(spec.output_bounds[0], spec.output_bounds[1], spec.output_bounds[2]); @@ -300,9 +297,9 @@ INSTANTIATE_TEST_CASE_P(BroadcastR3ImplicitTestInstances, // r1 and r3's dim0 matches, and r1's dim1 and dim2 have size 1: XLA_TEST_F(BroadcastSimpleTest, Add3DTo3DDegenerate_1_2) { - ComputationBuilder b(client_, TestName()); - ComputationDataHandle r1h; - ComputationDataHandle r3h; + XlaBuilder b(TestName()); + XlaOp r1h; + XlaOp r3h; Array3D r1d = {{{1}}, {{2}}}; Array3D r3d = {{{1, 2}, {3, 4}}, {{5, 6}, {7, 8}}}; @@ -319,7 +316,7 @@ XLA_TEST_F(BroadcastSimpleTest, Add3DTo3DDegenerate_1_2) { } XLA_TEST_F(BroadcastSimpleTest, Add3DTo3DDegenerate_0_1) { - ComputationBuilder b(client_, TestName()); + XlaBuilder b(TestName()); auto r1 = b.ConstantLiteral(*Literal::CreateR3({{{1, 2}}})); auto r3 = b.ConstantLiteral( *Literal::CreateR3({{{1, 2}, {3, 4}}, {{5, 6}, {7, 8}}})); @@ -332,7 +329,7 @@ XLA_TEST_F(BroadcastSimpleTest, Add3DTo3DDegenerate_0_1) { } XLA_TEST_F(BroadcastSimpleTest, Add3DTo3DDegenerate_0_2) { - ComputationBuilder b(client_, TestName()); + XlaBuilder b(TestName()); auto r1 = b.ConstantLiteral(*Literal::CreateR3({{{1}, {2}}})); auto r3 = b.ConstantLiteral( *Literal::CreateR3({{{1, 2}, {3, 4}}, {{5, 6}, {7, 8}}})); @@ -345,7 +342,7 @@ XLA_TEST_F(BroadcastSimpleTest, Add3DTo3DDegenerate_0_2) { } XLA_TEST_F(BroadcastSimpleTest, Add3DTo3DDegenerate_0) { - ComputationBuilder b(client_, TestName()); + XlaBuilder b(TestName()); auto r1 = b.ConstantLiteral(*Literal::CreateR3({{{1, 2}, {3, 4}}})); auto r3 = b.ConstantLiteral( *Literal::CreateR3({{{1, 2}, {3, 4}}, {{5, 6}, {7, 8}}})); @@ -358,7 +355,7 @@ XLA_TEST_F(BroadcastSimpleTest, Add3DTo3DDegenerate_0) { } XLA_TEST_F(BroadcastSimpleTest, Add3DTo3DDegenerate_1) { - ComputationBuilder b(client_, TestName()); + XlaBuilder b(TestName()); auto r1 = b.ConstantLiteral(*Literal::CreateR3({{{1, 2}}, {{3, 4}}})); auto r3 = b.ConstantLiteral( *Literal::CreateR3({{{1, 2}, {3, 4}}, {{5, 6}, {7, 8}}})); @@ -371,7 +368,7 @@ XLA_TEST_F(BroadcastSimpleTest, Add3DTo3DDegenerate_1) { } XLA_TEST_F(BroadcastSimpleTest, Add3DTo3DDegenerate_2) { - ComputationBuilder b(client_, TestName()); + XlaBuilder b(TestName()); auto r1 = b.ConstantLiteral(*Literal::CreateR3({{{1}, {2}}, {{3}, {4}}})); auto r3 = b.ConstantLiteral( @@ -385,7 +382,7 @@ XLA_TEST_F(BroadcastSimpleTest, Add3DTo3DDegenerate_2) { } XLA_TEST_F(BroadcastSimpleTest, Add3DTo3DDegenerate_0_1_2) { - ComputationBuilder b(client_, TestName()); + XlaBuilder b(TestName()); auto r1 = b.ConstantLiteral(*Literal::CreateR3({{{1}}})); auto r3 = b.ConstantLiteral( *Literal::CreateR3({{{1, 2}, {3, 4}}, {{5, 6}, {7, 8}}})); @@ -491,7 +488,7 @@ class BroadcastR2ImplicitTest XLA_TEST_P(BroadcastR2ImplicitTest, Doit) { const R2ImplicitBroadcastSpec& spec = GetParam(); - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); // Operands with degenerate dimensions require implicit broadcasting: Shape r2_shape, r2_implicit_shape1, r2_implicit_shape2; @@ -517,10 +514,9 @@ XLA_TEST_P(BroadcastR2ImplicitTest, Doit) { auto r2_implicit_parameter2 = builder.Parameter(2, r2_implicit_shape2, "input2"); - ComputationDataHandle op1 = + XlaOp op1 = BuildBinOp(spec.op1, r2_implicit_parameter1, r2_parameter, &builder); - ComputationDataHandle op2 = - BuildBinOp(spec.op2, op1, r2_implicit_parameter2, &builder); + XlaOp op2 = BuildBinOp(spec.op2, op1, r2_implicit_parameter2, &builder); Array2D expected_array(spec.output_bounds[0], spec.output_bounds[1]); @@ -547,7 +543,7 @@ INSTANTIATE_TEST_CASE_P(BroadcastR2ImplicitTestInstances, ::testing::ValuesIn(kR2ImplicitBroadcastTestCases)); XLA_TEST_F(BroadcastSimpleTest, Add2DTo2DDegenerate_0) { - ComputationBuilder b(client_, TestName()); + XlaBuilder b(TestName()); auto r1 = b.ConstantLiteral(*Literal::CreateR2({{1, 2}})); auto r2 = b.ConstantLiteral(*Literal::CreateR2({{1, 2}, {3, 4}})); b.Add(r2, r1); @@ -558,7 +554,7 @@ XLA_TEST_F(BroadcastSimpleTest, Add2DTo2DDegenerate_0) { } XLA_TEST_F(BroadcastSimpleTest, Add2DTo2DDegenerate_1) { - ComputationBuilder b(client_, TestName()); + XlaBuilder b(TestName()); auto r1 = b.ConstantLiteral(*Literal::CreateR2({{1}, {2}})); auto r2 = b.ConstantLiteral(*Literal::CreateR2({{1, 2}, {3, 4}})); b.Add(r2, r1); @@ -569,7 +565,7 @@ XLA_TEST_F(BroadcastSimpleTest, Add2DTo2DDegenerate_1) { } XLA_TEST_F(BroadcastSimpleTest, Add1DTo3DInDim0) { - ComputationBuilder b(client_, TestName()); + XlaBuilder b(TestName()); auto r1 = b.ConstantR1({10, 20}); auto r3 = b.ConstantLiteral( *Literal::CreateR3({{{1, 2}, {3, 4}}, {{5, 6}, {7, 8}}})); @@ -582,7 +578,7 @@ XLA_TEST_F(BroadcastSimpleTest, Add1DTo3DInDim0) { } XLA_TEST_F(BroadcastSimpleTest, Add1DTo3DInDim1) { - ComputationBuilder b(client_, TestName()); + XlaBuilder b(TestName()); auto r1 = b.ConstantR1({10, 20}); auto r3 = b.ConstantLiteral( *Literal::CreateR3({{{1, 2}, {3, 4}}, {{5, 6}, {7, 8}}})); @@ -595,7 +591,7 @@ XLA_TEST_F(BroadcastSimpleTest, Add1DTo3DInDim1) { } XLA_TEST_F(BroadcastSimpleTest, Add1DTo3DInDim2) { - ComputationBuilder b(client_, TestName()); + XlaBuilder b(TestName()); auto r1 = b.ConstantR1({10, 20}); auto r3 = b.ConstantLiteral( *Literal::CreateR3({{{1, 2}, {3, 4}}, {{5, 6}, {7, 8}}})); @@ -608,7 +604,7 @@ XLA_TEST_F(BroadcastSimpleTest, Add1DTo3DInDim2) { } XLA_TEST_F(BroadcastSimpleTest, Add1DTo3DInDimAll) { - ComputationBuilder b(client_, TestName()); + XlaBuilder b(TestName()); auto r1_0 = b.ConstantR1({1000, 2000}); auto r1_1 = b.ConstantR1({100, 200}); auto r1_2 = b.ConstantR1({10, 20}); @@ -629,7 +625,7 @@ XLA_TEST_F(BroadcastSimpleTest, Add1DTo3DInDimAll) { } XLA_TEST_F(BroadcastSimpleTest, Add1DTo3DInDimAllWithScalarBroadcast) { - ComputationBuilder b(client_, TestName()); + XlaBuilder b(TestName()); auto r1_0 = b.ConstantR1({1000, 2000}); auto r1_1 = b.ConstantR1({100, 200}); auto r1_2 = b.ConstantR1({10, 20}); @@ -652,7 +648,7 @@ XLA_TEST_F(BroadcastSimpleTest, Add1DTo3DInDimAllWithScalarBroadcast) { XLA_TEST_F(BroadcastSimpleTest, InvalidBinaryAndDegenerateBroadcasting) { // Binary dimension broadcasting of the smaller lhs ([2, 2] up to [2, 2, 2]) // results in a shape incompatible with the lhs [2, 3, 1]. - ComputationBuilder b(client_, TestName()); + XlaBuilder b(TestName()); b.Add(b.ConstantR2({{1.0, 5.0}, {1.0, 5.0}}), b.ConstantLiteral(*Literal::CreateR3( @@ -667,7 +663,7 @@ XLA_TEST_F(BroadcastSimpleTest, InvalidBinaryAndDegenerateBroadcasting) { XLA_TEST_F(BroadcastSimpleTest, InvalidInDimensionBroadcasting) { // Test invalid broadcasting with [1, 2] and [2, 3] inputs. - ComputationBuilder b(client_, TestName()); + XlaBuilder b(TestName()); b.Add(b.ConstantR2({{1.0, 2.0}}), b.ConstantR2({{1.0, 2.0, 3.0}, {4.0, 5.0, 6.0}})); @@ -680,7 +676,7 @@ XLA_TEST_F(BroadcastSimpleTest, InvalidInDimensionBroadcasting) { XLA_TEST_F(BroadcastSimpleTest, InvalidDegenerateBroadcasting) { // Test invalid broadcasting with [1, 2] and [2, 3] inputs. - ComputationBuilder b(client_, TestName()); + XlaBuilder b(TestName()); b.Add(b.ConstantR2({{1.0, 2.0}}), b.ConstantR2({{1.0, 2.0, 3.0}, {4.0, 5.0, 6.0}})); diff --git a/tensorflow/compiler/xla/tests/client_library_test_base.cc b/tensorflow/compiler/xla/tests/client_library_test_base.cc index 69389dae3f..31c9e21644 100644 --- a/tensorflow/compiler/xla/tests/client_library_test_base.cc +++ b/tensorflow/compiler/xla/tests/client_library_test_base.cc @@ -211,6 +211,14 @@ void ClientLibraryTestBase::ComputeAndCompareR1( arguments); } +void ClientLibraryTestBase::ComputeAndCompareR1( + XlaBuilder* builder, const tensorflow::core::Bitmap& expected, + tensorflow::gtl::ArraySlice arguments) { + std::unique_ptr expected_literal = Literal::CreateR1(expected); + ClientLibraryTestBase::ComputeAndCompareLiteral(builder, *expected_literal, + arguments); +} + template void ClientLibraryTestBase::ComputeAndCompareLiteral( BuilderT* builder, const Literal& expected, diff --git a/tensorflow/compiler/xla/tests/client_library_test_base.h b/tensorflow/compiler/xla/tests/client_library_test_base.h index 481d7c5c25..85ebe29ae9 100644 --- a/tensorflow/compiler/xla/tests/client_library_test_base.h +++ b/tensorflow/compiler/xla/tests/client_library_test_base.h @@ -165,6 +165,9 @@ class ClientLibraryTestBase : public ::testing::Test { void ComputeAndCompareR1(ComputationBuilder* builder, const tensorflow::core::Bitmap& expected, tensorflow::gtl::ArraySlice arguments); + void ComputeAndCompareR1(XlaBuilder* builder, + const tensorflow::core::Bitmap& expected, + tensorflow::gtl::ArraySlice arguments); template void ComputeAndCompareR2(BuilderT* builder, const Array2D& expected, -- GitLab From 9e1d93d28fe30171de3f6838028eeadb44b0d6fd Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 23 Apr 2018 15:15:25 -0700 Subject: [PATCH 168/434] Changing tf.foldl and tf.foldr to accept multiple/nested tensors as element/initializer. PiperOrigin-RevId: 193993295 --- .../kernel_tests/functional_ops_test.py | 40 +++++++ tensorflow/python/ops/functional_ops.py | 100 ++++++++++++------ 2 files changed, 110 insertions(+), 30 deletions(-) diff --git a/tensorflow/python/kernel_tests/functional_ops_test.py b/tensorflow/python/kernel_tests/functional_ops_test.py index 34fb655035..5f48be94da 100644 --- a/tensorflow/python/kernel_tests/functional_ops_test.py +++ b/tensorflow/python/kernel_tests/functional_ops_test.py @@ -70,6 +70,26 @@ class FunctionalOpsTest(test.TestCase): initializer=10) self.assertAllEqual(880, self.evaluate(r)) + @test_util.run_in_graph_and_eager_modes() + def testFoldl_SingleInputMultiOutput(self): + with self.test_session(): + elems = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0]) + initializer = np.array([1, -1.0]) + r = functional_ops.foldl(lambda a, x: a + x, elems, initializer) + r_value = self.evaluate(r) + + self.assertAllEqual(22, r_value[0]) + self.assertAllEqual(20, r_value[1]) + + @test_util.run_in_graph_and_eager_modes() + def testFoldl_MultiInputSingleOutput(self): + with self.test_session(): + elems = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0]) + initializer = np.array(1.0) + r = functional_ops.foldl(lambda a, x: a + x[0] + x[1], (elems, -elems), + initializer) + self.assertAllEqual(1, self.evaluate(r)) + def testFoldl_Scoped(self): with self.test_session() as sess: with variable_scope.variable_scope("root") as varscope: @@ -105,6 +125,26 @@ class FunctionalOpsTest(test.TestCase): initializer=10) self.assertAllEqual(1282, self.evaluate(r)) + @test_util.run_in_graph_and_eager_modes() + def testFoldr_SingleInputMultiOutput(self): + with self.test_session(): + elems = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0]) + initializer = np.array([1, -1.0]) + r = functional_ops.foldr(lambda a, x: a + x, elems, initializer) + r_value = self.evaluate(r) + + self.assertAllEqual(22, r_value[0]) + self.assertAllEqual(20, r_value[1]) + + @test_util.run_in_graph_and_eager_modes() + def testFoldr_MultiInputSingleOutput(self): + with self.test_session(): + elems = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0]) + initializer = np.array(1.0) + r = functional_ops.foldr(lambda a, x: a + x[0] + x[1], (elems, -elems), + initializer) + self.assertAllEqual(1, self.evaluate(r)) + def testFoldr_Scoped(self): with self.test_session() as sess: with variable_scope.variable_scope("root") as varscope: diff --git a/tensorflow/python/ops/functional_ops.py b/tensorflow/python/ops/functional_ops.py index 161f6f3659..1b3a1e5cbc 100644 --- a/tensorflow/python/ops/functional_ops.py +++ b/tensorflow/python/ops/functional_ops.py @@ -65,10 +65,20 @@ def foldl(fn, elems, initializer=None, parallel_iterations=10, back_prop=True, Suppose that `elems` is unpacked into `values`, a list of tensors. The shape of the result tensor is fn(initializer, values[0]).shape`. + This method also allows multi-arity `elems` and output of `fn`. If `elems` + is a (possibly nested) list or tuple of tensors, then each of these tensors + must have a matching first (unpack) dimension. The signature of `fn` may + match the structure of `elems`. That is, if `elems` is + `(t1, [t2, t3, [t4, t5]])`, then an appropriate signature for `fn` is: + `fn = lambda (t1, [t2, t3, [t4, t5]]):`. + Args: fn: The callable to be performed. - elems: A tensor to be unpacked on dimension 0. - initializer: (optional) The initial value for the accumulator. + elems: A tensor or (possibly nested) sequence of tensors, each of which + will be unpacked along their first dimension. The nested sequence + of the resulting slices will be the first argument to `fn`. + initializer: (optional) A tensor or (possibly nested) sequence of tensors, + as the initial value for the accumulator. parallel_iterations: (optional) The number of iterations allowed to run in parallel. back_prop: (optional) True enables support for back propagation. @@ -76,8 +86,9 @@ def foldl(fn, elems, initializer=None, parallel_iterations=10, back_prop=True, name: (optional) Name prefix for the returned tensors. Returns: - A tensor resulting from applying `fn` consecutively to the list of tensors - unpacked from `elems`, from first to last. + A tensor or (possibly nested) sequence of tensors, resulting from applying + `fn` consecutively to the list of tensors unpacked from `elems`, from first + to last. Raises: TypeError: if `fn` is not callable. @@ -92,6 +103,11 @@ def foldl(fn, elems, initializer=None, parallel_iterations=10, back_prop=True, if not callable(fn): raise TypeError("fn must be callable.") + def create_ta(elem): + return tensor_array_ops.TensorArray( + dtype=elem.dtype, size=n, dynamic_size=False, + infer_shape=True).unstack(elem) + in_graph_mode = not context.executing_eagerly() with ops.name_scope(name, "foldl", [elems]): # TODO(akshayka): Remove the in_graph_mode check once caching devices are @@ -107,24 +123,26 @@ def foldl(fn, elems, initializer=None, parallel_iterations=10, back_prop=True, varscope.set_caching_device(lambda op: op.device) varscope_caching_device_was_none = True - # Convert elems to tensor array. - elems = ops.convert_to_tensor(elems, name="elems") - n = array_ops.shape(elems)[0] - elems_ta = tensor_array_ops.TensorArray(dtype=elems.dtype, size=n, - dynamic_size=False, - infer_shape=True) - elems_ta = elems_ta.unstack(elems) + # Convert elems to tensor array. n may be known statically. + elems_flat = [ + ops.convert_to_tensor(elem, name="elem") for elem in nest.flatten(elems) + ] + n = elems_flat[0].shape[0].value or array_ops.shape(elems_flat[0])[0] + + elems_ta = nest.map_structure(create_ta, elems) if initializer is None: - a = elems_ta.read(0) + a = nest.map_structure(lambda elem: elem.read(0), elems_ta) i = constant_op.constant(1) else: - a = ops.convert_to_tensor(initializer) + a = initializer i = constant_op.constant(0) def compute(i, a): - a = fn(a, elems_ta.read(i)) + elem_i = nest.map_structure(lambda elem: elem.read(i), elems_ta) + a = fn(a, elem_i) return [i + 1, a] + _, r_a = control_flow_ops.while_loop( lambda i, a: i < n, compute, [i, a], parallel_iterations=parallel_iterations, @@ -135,6 +153,7 @@ def foldl(fn, elems, initializer=None, parallel_iterations=10, back_prop=True, # supported in Eager if in_graph_mode and varscope_caching_device_was_none: varscope.set_caching_device(None) + return r_a @@ -153,10 +172,20 @@ def foldr(fn, elems, initializer=None, parallel_iterations=10, back_prop=True, Suppose that `elems` is unpacked into `values`, a list of tensors. The shape of the result tensor is `fn(initializer, values[0]).shape`. + This method also allows multi-arity `elems` and output of `fn`. If `elems` + is a (possibly nested) list or tuple of tensors, then each of these tensors + must have a matching first (unpack) dimension. The signature of `fn` may + match the structure of `elems`. That is, if `elems` is + `(t1, [t2, t3, [t4, t5]])`, then an appropriate signature for `fn` is: + `fn = lambda (t1, [t2, t3, [t4, t5]]):`. + Args: fn: The callable to be performed. - elems: A tensor that is unpacked into a sequence of tensors to apply `fn`. - initializer: (optional) The initial value for the accumulator. + elems: A tensor or (possibly nested) sequence of tensors, each of which + will be unpacked along their first dimension. The nested sequence + of the resulting slices will be the first argument to `fn`. + initializer: (optional) A tensor or (possibly nested) sequence of tensors, + as the initial value for the accumulator. parallel_iterations: (optional) The number of iterations allowed to run in parallel. back_prop: (optional) True enables support for back propagation. @@ -164,8 +193,9 @@ def foldr(fn, elems, initializer=None, parallel_iterations=10, back_prop=True, name: (optional) Name prefix for the returned tensors. Returns: - A tensor resulting from applying `fn` consecutively to the list of tensors - unpacked from `elems`, from last to first. + A tensor or (possibly nested) sequence of tensors, resulting from applying + `fn` consecutively to the list of tensors unpacked from `elems`, from last + to first. Raises: TypeError: if `fn` is not callable. @@ -180,6 +210,11 @@ def foldr(fn, elems, initializer=None, parallel_iterations=10, back_prop=True, if not callable(fn): raise TypeError("fn must be callable.") + def create_ta(elem): + return tensor_array_ops.TensorArray( + dtype=elem.dtype, size=n, dynamic_size=False, + infer_shape=True).unstack(elem) + in_graph_mode = not context.executing_eagerly() with ops.name_scope(name, "foldr", [elems]): # TODO(akshayka): Remove the in_graph_mode check once caching devices are @@ -195,26 +230,30 @@ def foldr(fn, elems, initializer=None, parallel_iterations=10, back_prop=True, varscope.set_caching_device(lambda op: op.device) varscope_caching_device_was_none = True - # Convert elems to tensor array. - elems = ops.convert_to_tensor(elems, name="elems") - n = array_ops.shape(elems)[0] - elems_ta = tensor_array_ops.TensorArray(dtype=elems.dtype, size=n, - dynamic_size=False, - infer_shape=True) - elems_ta = elems_ta.unstack(elems) + # Convert elems to tensor array. n may be known statically. + elems_flat = [ + ops.convert_to_tensor(elem, name="elem") for elem in nest.flatten(elems) + ] + n = elems_flat[0].shape[0].value or array_ops.shape(elems_flat[0])[0] + + elems_ta = nest.map_structure(create_ta, elems) if initializer is None: i = n - 1 - a = elems_ta.read(i) + a = nest.map_structure(lambda elem: elem.read(i), elems_ta) else: i = n - a = ops.convert_to_tensor(initializer) + a = initializer + def compute(i, a): i -= 1 - a = fn(a, elems_ta.read(i)) - return [i, a] + elem = nest.map_structure(lambda elem: elem.read(i), elems_ta) + a_out = fn(a, elem) + return [i, a_out] + _, r_a = control_flow_ops.while_loop( - lambda i, a: i > 0, compute, [i, a], + lambda i, a: i > 0, + compute, [i, a], parallel_iterations=parallel_iterations, back_prop=back_prop, swap_memory=swap_memory) @@ -223,6 +262,7 @@ def foldr(fn, elems, initializer=None, parallel_iterations=10, back_prop=True, # supported in Eager if in_graph_mode and varscope_caching_device_was_none: varscope.set_caching_device(None) + return r_a -- GitLab From 01141932a9cdcd871310db141a66a47410c48ac0 Mon Sep 17 00:00:00 2001 From: Igor Ganichev Date: Mon, 23 Apr 2018 15:30:12 -0700 Subject: [PATCH 169/434] Support executing ops eagerly through XLA The ony real change is to add GpuDeviceInfo to XlaDevice. It is used by eager runtime to retrieve default device context. PiperOrigin-RevId: 193995586 --- tensorflow/compiler/jit/BUILD | 1 + tensorflow/compiler/jit/xla_device.cc | 40 +++++-- tensorflow/compiler/jit/xla_device.h | 8 ++ tensorflow/compiler/jit/xla_gpu_device.cc | 9 ++ tensorflow/compiler/tests/BUILD | 19 +++ tensorflow/compiler/tests/eager_test.py | 137 ++++++++++++++++++++++ 6 files changed, 206 insertions(+), 8 deletions(-) create mode 100644 tensorflow/compiler/tests/eager_test.py diff --git a/tensorflow/compiler/jit/BUILD b/tensorflow/compiler/jit/BUILD index 50fa95c4f3..53b124cf89 100644 --- a/tensorflow/compiler/jit/BUILD +++ b/tensorflow/compiler/jit/BUILD @@ -180,6 +180,7 @@ cc_library( "//tensorflow/core/kernels:no_op", "//tensorflow/core/kernels:sendrecv_ops", "//tensorflow/core/kernels:variable_ops", + "@com_google_absl//absl/memory", ], ) diff --git a/tensorflow/compiler/jit/xla_device.cc b/tensorflow/compiler/jit/xla_device.cc index 12f471735f..2c2ac839b3 100644 --- a/tensorflow/compiler/jit/xla_device.cc +++ b/tensorflow/compiler/jit/xla_device.cc @@ -18,6 +18,7 @@ limitations under the License. #include #include +#include "absl/memory/memory.h" #include "tensorflow/compiler/jit/defs.h" #include "tensorflow/compiler/jit/xla_compile_on_demand_op.h" #include "tensorflow/compiler/jit/xla_device_context.h" @@ -181,9 +182,15 @@ XlaDevice::XlaDevice(const SessionOptions& options, jit_device_name_(jit_device_name), xla_allocator_(nullptr), platform_(platform), - transfer_as_literal_(transfer_as_literal) {} + transfer_as_literal_(transfer_as_literal) { + VLOG(1) << "Created XLA device " << jit_device_name; +} -XlaDevice::~XlaDevice() {} +XlaDevice::~XlaDevice() { + if (gpu_device_info_ != nullptr) { + gpu_device_info_->default_context->Unref(); + } +} xla::LocalClient* XlaDevice::client() const { // We lazily create the client because the platform commits to the @@ -191,9 +198,8 @@ xla::LocalClient* XlaDevice::client() const { // don't want to do it until we get a chance to hook the platform up // to a simulator. - // For now GetOrCreateLocalClient always returns success when passed - // a non-null platform. If that changes we may have to plumb in some - // way to pass Status back. + // TODO(b/78468222): This can fail, at least when the backend is GPU and + // there is no GPU on the host. return xla::ClientLibrary::GetOrCreateLocalClient(platform_).ValueOrDie(); } @@ -218,14 +224,32 @@ xla::StatusOr XlaDevice::GetStream() { return stream_.get(); } +Status XlaDevice::CreateAndSetGpuDeviceInfo() { + if (gpu_device_info_ == nullptr) { + TF_ASSIGN_OR_RETURN(se::Stream * stream, GetStream()); + // Call GetAllocator for the side-effect of ensuring the allocator + // is created. + GetAllocator({}); + // XlaDevice owns both gpu_device_info_ and + // gpu_device_info_->default_context. + gpu_device_info_ = absl::make_unique(); + gpu_device_info_->stream = stream; + gpu_device_info_->default_context = + new XlaDeviceContext(stream, client(), transfer_as_literal_); + gpu_device_info_->default_context->Ref(); + set_tensorflow_gpu_device_info(gpu_device_info_.get()); + } + + return Status::OK(); +} + Status XlaDevice::FillContextMap(const Graph* graph, DeviceContextMap* device_context_map) { VLOG(1) << "XlaDevice::FillContextMap"; device_context_map->resize(graph->num_node_ids()); TF_ASSIGN_OR_RETURN(se::Stream * stream, GetStream()); - // Call GetAllocator for the side-effect of ensuring the allocator and - // XlaTensorInfoManager is created. - (void)GetAllocator({}); + // Call GetAllocator for the side-effect of ensuring the allocator is created. + GetAllocator({}); auto ctx = new XlaDeviceContext(stream, client(), transfer_as_literal_); for (Node* n : graph->nodes()) { VLOG(2) << n->id() << " : " << n->type_string() << " : " << n->name(); diff --git a/tensorflow/compiler/jit/xla_device.h b/tensorflow/compiler/jit/xla_device.h index 4fe7dd8c9f..2f5c53aea8 100644 --- a/tensorflow/compiler/jit/xla_device.h +++ b/tensorflow/compiler/jit/xla_device.h @@ -105,6 +105,10 @@ class XlaDevice : public LocalDevice { xla::LocalClient* client() const; xla::StatusOr<::perftools::gputools::Stream*> GetStream(); + // If not already set, create and set GpuDeviceInfo. + // Not thread-safe + Status CreateAndSetGpuDeviceInfo(); + private: // The metadata of this XlaDevice. const Metadata xla_metadata_; @@ -123,6 +127,10 @@ class XlaDevice : public LocalDevice { // Must we use XLA's transfer manager for correct host<->device transfers? if // false, we can use ThenMemcpy() instead. bool transfer_as_literal_; + + // If set, holds default device context (that we must Unref) + // and its stream. + std::unique_ptr gpu_device_info_; }; // Builds OpKernel registrations on 'device' for the JIT operators diff --git a/tensorflow/compiler/jit/xla_gpu_device.cc b/tensorflow/compiler/jit/xla_gpu_device.cc index ac60423d95..a8afbf9dcd 100644 --- a/tensorflow/compiler/jit/xla_gpu_device.cc +++ b/tensorflow/compiler/jit/xla_gpu_device.cc @@ -54,6 +54,15 @@ Status XlaGpuDeviceFactory::CreateDevices(const SessionOptions& options, VLOG(1) << "Failed to create XLA_GPU device: " << status; return Status::OK(); } + + // TODO(b/78468222): Uncomment after fixing this bug + // status = device->CreateAndSetGpuDeviceInfo(); + // if (!status.ok()) { + // errors::AppendToMessage(&status, "while setting up ", DEVICE_GPU_XLA_JIT, + // " device"); + // return status; + // } + devices->push_back(device.release()); return Status::OK(); } diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD index 46b86c53aa..ac2441cea0 100644 --- a/tensorflow/compiler/tests/BUILD +++ b/tensorflow/compiler/tests/BUILD @@ -308,6 +308,25 @@ tf_xla_py_test( ], ) +tf_xla_py_test( + name = "eager_test", + size = "small", + srcs = ["eager_test.py"], + disabled_backends = [ + # TODO(b/78199195) Support XLA CPU devices in eager runtime + "cpu", + "cpu_ondemand", + # TODO(b/78468222) Enable GPU backend + "gpu", + ], + deps = [ + ":xla_test", + "//tensorflow/python:array_ops", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:platform_test", + ], +) + tf_xla_py_test( name = "fft_test", size = "medium", diff --git a/tensorflow/compiler/tests/eager_test.py b/tensorflow/compiler/tests/eager_test.py new file mode 100644 index 0000000000..bdd0185dfe --- /dev/null +++ b/tensorflow/compiler/tests/eager_test.py @@ -0,0 +1,137 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Test cases for eager execution using XLA.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.compiler.tests.xla_test import XLATestCase +from tensorflow.core.protobuf import config_pb2 +from tensorflow.python.eager import backprop +from tensorflow.python.eager import context +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import resource_variable_ops +from tensorflow.python.platform import googletest + + +class EagerTest(XLATestCase): + + def testBasic(self): + with self.test_scope(): + three = constant_op.constant(3) + five = constant_op.constant(5) + product = three * five + self.assertAllEqual(15, product) + + def testExecuteListOutputLen0(self): + with self.test_scope(): + empty = constant_op.constant([], dtype=dtypes.int32) + result = array_ops.unstack(empty, 0) + self.assertTrue(isinstance(result, list)) + self.assertEqual(0, len(result)) + + def testExecuteListOutputLen1(self): + with self.test_scope(): + split_dim = constant_op.constant(1) + value = constant_op.constant([[0, 1, 2], [3, 4, 5]]) + result = array_ops.split(value, 1, axis=split_dim) + self.assertTrue(isinstance(result, list)) + self.assertEqual(1, len(result)) + self.assertAllEqual([[0, 1, 2], [3, 4, 5]], result[0]) + + def testExecuteListOutputLen3(self): + with self.test_scope(): + split_dim = constant_op.constant(1) + value = constant_op.constant([[0, 1, 2], [3, 4, 5]]) + result = array_ops.split(value, 3, axis=split_dim) + self.assertTrue(isinstance(result, list)) + self.assertEqual(3, len(result)) + self.assertAllEqual([[0], [3]], result[0]) + self.assertAllEqual([[1], [4]], result[1]) + self.assertAllEqual([[2], [5]], result[2]) + + def testBasicGraph(self): + # Run some ops eagerly + with self.test_scope(): + three = constant_op.constant(3) + five = constant_op.constant(5) + product = three * five + self.assertAllEqual(15, product) + + # Run some ops graphly + with context.graph_mode(), self.test_session() as sess: + with self.test_scope(): + three = constant_op.constant(3) + five = constant_op.constant(5) + product = three * five + self.assertAllEqual(15, sess.run(product)) + + def testDegenerateSlices(self): + with self.test_scope(): + npt = np.arange(1, 19, dtype=np.float32).reshape(3, 2, 3) + t = constant_op.constant(npt) + # degenerate by offering a forward interval with a negative stride + self.assertAllEqual(npt[0:-1:-1, :, :], t[0:-1:-1, :, :]) + # degenerate with a reverse interval with a positive stride + self.assertAllEqual(npt[-1:0, :, :], t[-1:0, :, :]) + # empty interval in every dimension + self.assertAllEqual(npt[-1:0, 2:2, 2:3:-1], t[-1:0, 2:2, 2:3:-1]) + + def testIdentity(self): + with self.test_scope(): + self.assertAllEqual(2, array_ops.identity(2)) + + def testIdentityOnVariable(self): + with self.test_scope(): + v = resource_variable_ops.ResourceVariable(True) + i = array_ops.identity(v) + self.assertAllEqual(True, i.numpy()) + + def testAssignAddVariable(self): + with self.test_scope(): + v = resource_variable_ops.ResourceVariable(1.0) + v.assign_add(2.0) + self.assertEqual(3.0, v.numpy()) + + def testGradient(self): + def f(x): + return x + + with self.test_scope(): + grad_fn = backprop.gradients_function(f) + self.assertAllEqual(2., grad_fn(1., dy=2.)[0]) + + def testVariableGradient(self): + with self.test_scope(): + v0 = resource_variable_ops.ResourceVariable(1.0) + + def f(): + x = v0 * v0 + return x + + grads = backprop.implicit_grad(f)() + self.assertEqual(2., grads[0][0].numpy()) + + +if __name__ == "__main__": + ops.enable_eager_execution( + config=config_pb2.ConfigProto(log_device_placement=True)) + googletest.main() -- GitLab From 2f2d4745836fdcf4bf365644017a900d98bd6206 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Mon, 23 Apr 2018 15:43:20 -0700 Subject: [PATCH 170/434] Not using a control flow context when building eager functions. PiperOrigin-RevId: 193997756 --- tensorflow/python/eager/function.py | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py index 0f1170bb42..b924448abe 100644 --- a/tensorflow/python/eager/function.py +++ b/tensorflow/python/eager/function.py @@ -405,7 +405,15 @@ class GraphModeFunction(object): c_known_ops = set() c_captured_tensors = set() - def add_op_internal(op): + existing_op_len = len(self._graph.get_operations()) + filtered_outputs = [x for x in self._returns if x is not None] + self._out_grad_placeholders = [ + graph_placeholder(x.dtype, x.shape) for x in filtered_outputs] + in_gradients = gradients_impl.gradients( + filtered_outputs, + self._input_placeholders, + grad_ys=self._out_grad_placeholders) + for op in self._graph.get_operations()[existing_op_len:]: if op.type in ["Variable", "VariableV2", "VarHandleOp"]: raise ValueError("tfe.defun cannot capture variables created without " "using tf.get_variable. Op: %s" % op) @@ -414,17 +422,6 @@ class GraphModeFunction(object): if i.op not in c_known_ops: c_captured_tensors.add(i) - c = HelperContext(add_op_internal) - - with c: - filtered_outputs = [x for x in self._returns if x is not None] - self._out_grad_placeholders = [ - graph_placeholder(x.dtype, x.shape) for x in filtered_outputs] - in_gradients = gradients_impl.gradients( - filtered_outputs, - self._input_placeholders, - grad_ys=self._out_grad_placeholders) - backward_outputs = tuple( grad for grad in _flatten(in_gradients) if grad is not None) output_shapes = tuple(grad.shape for grad in backward_outputs) -- GitLab From c8a1eeb98ca394d0330bead37b446bce998bb3d5 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 23 Apr 2018 15:50:56 -0700 Subject: [PATCH 171/434] [XLA] Redesign: migrate convolution tests. PiperOrigin-RevId: 193998684 --- tensorflow/compiler/xla/BUILD | 2 +- tensorflow/compiler/xla/reference_util.cc | 6 +- .../convolution_dimension_numbers_test.cc | 38 +++- .../xla/tests/convolution_variants_test.cc | 167 +++++++++--------- 4 files changed, 116 insertions(+), 97 deletions(-) diff --git a/tensorflow/compiler/xla/BUILD b/tensorflow/compiler/xla/BUILD index 88f37433a5..1af9cb6d2a 100644 --- a/tensorflow/compiler/xla/BUILD +++ b/tensorflow/compiler/xla/BUILD @@ -605,8 +605,8 @@ cc_library( ":util", ":window_util", ":xla_data_proto", - "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:padding", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", "//tensorflow/compiler/xla/service:hlo", "//tensorflow/compiler/xla/service:hlo_evaluator", "//tensorflow/compiler/xla/service:shape_inference", diff --git a/tensorflow/compiler/xla/reference_util.cc b/tensorflow/compiler/xla/reference_util.cc index ad3a28e119..df9dbc5830 100644 --- a/tensorflow/compiler/xla/reference_util.cc +++ b/tensorflow/compiler/xla/reference_util.cc @@ -18,7 +18,7 @@ limitations under the License. #include #include -#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" #include "tensorflow/compiler/xla/service/cpu/runtime_single_threaded_matmul.h" #include "tensorflow/compiler/xla/service/hlo_evaluator.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" @@ -90,7 +90,7 @@ std::unique_ptr> MatmulArray2DImpl( Padding padding) { return ConvArray3DGeneralDimensionsDilated( lhs, rhs, kernel_stride, padding, 1, 1, - ComputationBuilder::CreateDefaultConvDimensionNumbers(1)); + XlaBuilder::CreateDefaultConvDimensionNumbers(1)); } /*static*/ std::unique_ptr> @@ -140,7 +140,7 @@ ReferenceUtil::ConvArray3DGeneralDimensionsDilated( std::pair kernel_stride, Padding padding) { return ConvArray4DGeneralDimensions( lhs, rhs, kernel_stride, padding, - ComputationBuilder::CreateDefaultConvDimensionNumbers()); + XlaBuilder::CreateDefaultConvDimensionNumbers()); } /* static */ std::unique_ptr> diff --git a/tensorflow/compiler/xla/tests/convolution_dimension_numbers_test.cc b/tensorflow/compiler/xla/tests/convolution_dimension_numbers_test.cc index 896b34fb6e..b5a42e3059 100644 --- a/tensorflow/compiler/xla/tests/convolution_dimension_numbers_test.cc +++ b/tensorflow/compiler/xla/tests/convolution_dimension_numbers_test.cc @@ -18,9 +18,9 @@ limitations under the License. #include #include "tensorflow/compiler/xla/array4d.h" -#include "tensorflow/compiler/xla/client/computation_builder.h" #include "tensorflow/compiler/xla/client/local_client.h" #include "tensorflow/compiler/xla/client/padding.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" #include "tensorflow/compiler/xla/ptr_util.h" #include "tensorflow/compiler/xla/reference_util.h" #include "tensorflow/compiler/xla/statusor.h" @@ -34,13 +34,35 @@ limitations under the License. namespace xla { namespace { +StatusOr CreateConvDimensionNumbers( + int64 input_batch, int64 input_feature, int64 input_first_spatial, + int64 input_second_spatial, int64 output_batch, int64 output_feature, + int64 output_first_spatial, int64 output_second_spatial, + int64 kernel_output_feature, int64 kernel_input_feature, + int64 kernel_first_spatial, int64 kernel_second_spatial) { + ConvolutionDimensionNumbers dimension_numbers; + dimension_numbers.set_input_batch_dimension(input_batch); + dimension_numbers.set_input_feature_dimension(input_feature); + dimension_numbers.add_input_spatial_dimensions(input_first_spatial); + dimension_numbers.add_input_spatial_dimensions(input_second_spatial); + dimension_numbers.set_kernel_output_feature_dimension(kernel_output_feature); + dimension_numbers.set_kernel_input_feature_dimension(kernel_input_feature); + dimension_numbers.add_kernel_spatial_dimensions(kernel_first_spatial); + dimension_numbers.add_kernel_spatial_dimensions(kernel_second_spatial); + dimension_numbers.set_output_batch_dimension(output_batch); + dimension_numbers.set_output_feature_dimension(output_feature); + dimension_numbers.add_output_spatial_dimensions(output_first_spatial); + dimension_numbers.add_output_spatial_dimensions(output_second_spatial); + TF_RETURN_IF_ERROR(XlaBuilder::Validate(dimension_numbers)); + return dimension_numbers; +} + class ConvolutionDimensionNumbersTest : public ClientLibraryTestBase {}; // Tests the convolution operation with invalid input dimension numbers. TEST_F(ConvolutionDimensionNumbersTest, InvalidInputDimensionNumbers) { auto dimension_numbers_status = - ComputationBuilder::CreateConvDimensionNumbers(0, 2, 2, 3, 0, 1, 2, 3, 0, - 1, 2, 3); + CreateConvDimensionNumbers(0, 2, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3); ASSERT_FALSE(dimension_numbers_status.ok()); ASSERT_THAT(dimension_numbers_status.status().error_message(), ::testing::HasSubstr("input are not unique")); @@ -49,8 +71,7 @@ TEST_F(ConvolutionDimensionNumbersTest, InvalidInputDimensionNumbers) { // Tests the convolution operation with invalid weight dimension numbers. TEST_F(ConvolutionDimensionNumbersTest, InvalidWeightDimensionNumbers) { auto dimension_numbers_status = - ComputationBuilder::CreateConvDimensionNumbers(0, 1, 2, 3, 0, 1, 2, 3, 0, - 2, 2, 3); + CreateConvDimensionNumbers(0, 1, 2, 3, 0, 1, 2, 3, 0, 2, 2, 3); ASSERT_FALSE(dimension_numbers_status.ok()); ASSERT_THAT(dimension_numbers_status.status().error_message(), ::testing::HasSubstr("weight are not unique")); @@ -59,8 +80,7 @@ TEST_F(ConvolutionDimensionNumbersTest, InvalidWeightDimensionNumbers) { // Tests the convolution operation with invalid output dimension numbers. TEST_F(ConvolutionDimensionNumbersTest, InvalidOutputDimensionNumbers) { auto dimension_numbers_status = - ComputationBuilder::CreateConvDimensionNumbers(0, 1, 2, 3, 0, 2, 2, 3, 0, - 1, 2, 3); + CreateConvDimensionNumbers(0, 1, 2, 3, 0, 2, 2, 3, 0, 1, 2, 3); ASSERT_FALSE(dimension_numbers_status.ok()); ASSERT_THAT(dimension_numbers_status.status().error_message(), ::testing::HasSubstr("output are not unique")); @@ -76,14 +96,14 @@ XLA_TEST_F(ConvolutionDimensionNumbersTest, client_->TransferToServer(*Literal::CreateR4FromArray4D(*weight_array)) .ConsumeValueOrDie(); - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto input = builder.ConstantR4FromArray4D(*input_array); auto weight = builder.Parameter(0, ShapeUtil::MakeShape(F32, {4, 3, 1, 1}), "weight"); auto conv1 = builder.Conv(input, weight, {1, 1}, Padding::kValid); ConvolutionDimensionNumbers dim_nums = - ComputationBuilder::CreateDefaultConvDimensionNumbers(); + XlaBuilder::CreateDefaultConvDimensionNumbers(); // Swap batch_dimension and feature_dimension. int64 old_input_batch_dim = dim_nums.input_batch_dimension(); int64 old_output_batch_dim = dim_nums.output_batch_dimension(); diff --git a/tensorflow/compiler/xla/tests/convolution_variants_test.cc b/tensorflow/compiler/xla/tests/convolution_variants_test.cc index 9c1145def8..50d6e25d86 100644 --- a/tensorflow/compiler/xla/tests/convolution_variants_test.cc +++ b/tensorflow/compiler/xla/tests/convolution_variants_test.cc @@ -28,6 +28,7 @@ limitations under the License. #include "tensorflow/compiler/xla/client/computation_builder.h" #include "tensorflow/compiler/xla/client/local_client.h" #include "tensorflow/compiler/xla/client/padding.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/reference_util.h" #include "tensorflow/compiler/xla/tests/client_library_test_base.h" @@ -52,7 +53,7 @@ class ConvolutionVariantsTest : public ClientLibraryTestBase { }; XLA_TEST_F(ConvolutionVariantsTest, Minimal) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); const Array4D input_array(1, 1, 1, 1, {2}); auto input = builder.ConstantR4FromArray4D(input_array); @@ -67,7 +68,7 @@ XLA_TEST_F(ConvolutionVariantsTest, Minimal) { } XLA_TEST_F(ConvolutionVariantsTest, MinimalWithBatch) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); const Array4D input_array(5, 1, 1, 1, {1, 2, 3, 4, 5}); auto input = builder.ConstantR4FromArray4D(input_array); @@ -82,7 +83,7 @@ XLA_TEST_F(ConvolutionVariantsTest, MinimalWithBatch) { } XLA_TEST_F(ConvolutionVariantsTest, Flat1x1) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); Array4D input_array(2, 1, 3, 4); input_array.FillWithMultiples(1); @@ -99,7 +100,7 @@ XLA_TEST_F(ConvolutionVariantsTest, Flat1x1) { } XLA_TEST_F(ConvolutionVariantsTest, Deep1x1) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); Array4D input_array(1, 2, 1, 1, {10, 1}); auto input = builder.ConstantR4FromArray4D(input_array); @@ -114,7 +115,7 @@ XLA_TEST_F(ConvolutionVariantsTest, Deep1x1) { } XLA_TEST_F(ConvolutionVariantsTest, Filter1x2in1x2) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); Array4D input_array(1, 1, 1, 2, {1, 2}); auto input = builder.ConstantR4FromArray4D(input_array); @@ -129,7 +130,7 @@ XLA_TEST_F(ConvolutionVariantsTest, Filter1x2in1x2) { } XLA_TEST_F(ConvolutionVariantsTest, Filter1x2in1x3) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); Array4D input_array(1, 1, 1, 3, {1, 2, 3}); auto input = builder.ConstantR4FromArray4D(input_array); @@ -144,7 +145,7 @@ XLA_TEST_F(ConvolutionVariantsTest, Filter1x2in1x3) { } XLA_TEST_F(ConvolutionVariantsTest, Filter1x2in2x2) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); Array4D input_array(1, 1, 2, 2, {1, 2, 3, 4}); auto input = builder.ConstantR4FromArray4D(input_array); @@ -159,7 +160,7 @@ XLA_TEST_F(ConvolutionVariantsTest, Filter1x2in2x2) { } XLA_TEST_F(ConvolutionVariantsTest, Filter2x1in2x2) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); Array4D input_array(1, 1, 2, 2, {1, 2, 3, 4}); auto input = builder.ConstantR4FromArray4D(input_array); @@ -174,7 +175,7 @@ XLA_TEST_F(ConvolutionVariantsTest, Filter2x1in2x2) { } XLA_TEST_F(ConvolutionVariantsTest, Filter2x2in2x2) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); Array4D input_array(1, 1, 2, 2, {1, 2, 3, 4}); auto input = builder.ConstantR4FromArray4D(input_array); @@ -189,7 +190,7 @@ XLA_TEST_F(ConvolutionVariantsTest, Filter2x2in2x2) { } XLA_TEST_F(ConvolutionVariantsTest, Filter1x2in2x3WithDepthAndBatch) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); Array4D input_array( 2, 2, 2, 3, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, // plane 0 @@ -210,7 +211,7 @@ XLA_TEST_F(ConvolutionVariantsTest, Filter1x2in2x3WithDepthAndBatch) { } XLA_TEST_F(ConvolutionVariantsTest, Filter1x1stride1x2in1x4) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); Array4D input_array(1, 1, 1, 4, {1, 2, 3, 4}); auto input = builder.ConstantR4FromArray4D(input_array); @@ -225,7 +226,7 @@ XLA_TEST_F(ConvolutionVariantsTest, Filter1x1stride1x2in1x4) { } XLA_TEST_F(ConvolutionVariantsTest, Filter1x1stride1x2in1x5) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); Array4D input_array(1, 1, 1, 5, {1, 2, 3, 4, 5}); auto input = builder.ConstantR4FromArray4D(input_array); @@ -240,7 +241,7 @@ XLA_TEST_F(ConvolutionVariantsTest, Filter1x1stride1x2in1x5) { } XLA_TEST_F(ConvolutionVariantsTest, Filter1x3stride1x2in1x4) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); Array4D input_array(1, 1, 1, 4, {1, 2, 3, 4}); auto input = builder.ConstantR4FromArray4D(input_array); @@ -255,7 +256,7 @@ XLA_TEST_F(ConvolutionVariantsTest, Filter1x3stride1x2in1x4) { } XLA_TEST_F(ConvolutionVariantsTest, Filter1x3stride1x2in1x5) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); Array4D input_array(1, 1, 1, 5, {1, 2, 3, 4, 5}); auto input = builder.ConstantR4FromArray4D(input_array); @@ -270,7 +271,7 @@ XLA_TEST_F(ConvolutionVariantsTest, Filter1x3stride1x2in1x5) { } XLA_TEST_F(ConvolutionVariantsTest, Filter1x1stride2x2in3x3) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); Array4D input_array(1, 1, 3, 3, {1, 2, 3, 4, 5, 6, 7, 8, 9}); auto input = builder.ConstantR4FromArray4D(input_array); @@ -285,7 +286,7 @@ XLA_TEST_F(ConvolutionVariantsTest, Filter1x1stride2x2in3x3) { } XLA_TEST_F(ConvolutionVariantsTest, Filter3x1in1x1Padded) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); Array4D input_array(1, 1, 1, 1, {1}); auto input = builder.ConstantR4FromArray4D(input_array); @@ -300,7 +301,7 @@ XLA_TEST_F(ConvolutionVariantsTest, Filter3x1in1x1Padded) { } XLA_TEST_F(ConvolutionVariantsTest, Filter5x1in3x1Padded) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); Array4D input_array(1, 1, 1, 3, {1, 2, 3}); auto input = builder.ConstantR4FromArray4D(input_array); @@ -315,7 +316,7 @@ XLA_TEST_F(ConvolutionVariantsTest, Filter5x1in3x1Padded) { } XLA_TEST_F(ConvolutionVariantsTest, Filter3x3in2x2Padded) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); Array4D input_array(1, 1, 2, 2, {1, 2, 3, 4}); auto input = builder.ConstantR4FromArray4D(input_array); @@ -333,7 +334,7 @@ XLA_TEST_F(ConvolutionVariantsTest, Filter3x3in2x2Padded) { } XLA_TEST_F(ConvolutionVariantsTest, Filter1x1in2x1WithPaddingAndDepth) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); Array4D input_array(1, 2, 1, 2, {1, 2, 3, 4}); auto input = builder.ConstantR4FromArray4D(input_array); @@ -348,7 +349,7 @@ XLA_TEST_F(ConvolutionVariantsTest, Filter1x1in2x1WithPaddingAndDepth) { } XLA_TEST_F(ConvolutionVariantsTest, Filter2x2Stride1x1Input3x3) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); Array4D input_array(1, 1, 3, 3, {1, 2, 3, 4, 5, 6, 7, 8, 9}); auto input = builder.ConstantR4FromArray4D(input_array); @@ -363,7 +364,7 @@ XLA_TEST_F(ConvolutionVariantsTest, Filter2x2Stride1x1Input3x3) { } XLA_TEST_F(ConvolutionVariantsTest, Filter1x2Stride1x1Input1x3) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); Array4D input_array(1, 1, 1, 3, {1, 2, 3}); auto input = builder.ConstantR4FromArray4D(input_array); @@ -378,7 +379,7 @@ XLA_TEST_F(ConvolutionVariantsTest, Filter1x2Stride1x1Input1x3) { } XLA_TEST_F(ConvolutionVariantsTest, Filter2x1x8x8Input1x1x8x8) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); std::vector input_data(64); std::iota(input_data.begin(), input_data.end(), 0.0); @@ -398,7 +399,7 @@ XLA_TEST_F(ConvolutionVariantsTest, Filter2x1x8x8Input1x1x8x8) { } XLA_TEST_F(ConvolutionVariantsTest, Filter1x1x1x1Input16x1x1x1) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); std::vector input_data(16 * 1 * 1 * 1); std::iota(input_data.begin(), input_data.end(), 1.0); @@ -419,7 +420,7 @@ XLA_TEST_F(ConvolutionVariantsTest, Filter1x1x1x1Input16x1x1x1) { } XLA_TEST_F(ConvolutionVariantsTest, Filter1x1x2x2Input16x1x2x2) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); constexpr int bs = 16; constexpr int kx = 2; @@ -450,7 +451,7 @@ XLA_TEST_F(ConvolutionVariantsTest, Filter1x1x2x2Input16x1x2x2) { } XLA_TEST_F(ConvolutionVariantsTest, Filter1x1x2x2Input3x1x2x2) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); constexpr int kx = 2; constexpr int ky = 2; @@ -482,7 +483,7 @@ XLA_TEST_F(ConvolutionVariantsTest, Filter1x1x2x2Input3x1x2x2) { } XLA_TEST_F(ConvolutionVariantsTest, Filter1x1x8x8Input16x1x8x8) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); Array4D input_array(16, 1, 8, 8); for (int i0 = 0; i0 < 16; ++i0) { @@ -510,7 +511,7 @@ XLA_TEST_F(ConvolutionVariantsTest, Filter1x1x8x8Input16x1x8x8) { } XLA_TEST_F(ConvolutionVariantsTest, Filter2x2x8x8Input1x2x8x8) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); std::vector input_data(2 * 8 * 8); std::iota(input_data.begin(), input_data.end(), 0.0); @@ -536,7 +537,7 @@ XLA_TEST_F(ConvolutionVariantsTest, Filter2x2x8x8Input1x2x8x8) { } XLA_TEST_F(ConvolutionVariantsTest, Filter2x2x8x8Input2x2x8x8) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); std::vector input_data(2 * 2 * 8 * 8); std::iota(input_data.begin(), input_data.end(), 0.0); @@ -562,7 +563,7 @@ XLA_TEST_F(ConvolutionVariantsTest, Filter2x2x8x8Input2x2x8x8) { } XLA_TEST_F(ConvolutionVariantsTest, Filter2x2x8x8Input32x2x8x8) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); std::vector input_data(32 * 2 * 8 * 8); std::iota(input_data.begin(), input_data.end(), 0.0); @@ -602,7 +603,7 @@ XLA_TEST_F(ConvolutionVariantsTest, Filter2x2x8x8Input32x2x8x8) { } XLA_TEST_F(ConvolutionVariantsTest, Filter16x16x1x1Input16x16x1x1) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); Array4D input_array(16, 16, 1, 1); Array4D filter_array(16, 16, 1, 1); @@ -628,7 +629,7 @@ XLA_TEST_F(ConvolutionVariantsTest, Filter16x16x1x1Input16x16x1x1) { } XLA_TEST_F(ConvolutionVariantsTest, FlatRhsDilation) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); std::vector input_data(1 * 1 * 4 * 6); std::iota(input_data.begin(), input_data.end(), 0.0); @@ -640,14 +641,14 @@ XLA_TEST_F(ConvolutionVariantsTest, FlatRhsDilation) { builder.ConvGeneralDilated( /*lhs=*/input, /*rhs=*/filter, /*window_strides=*/{}, /*padding=*/{}, /*lhs_dilation=*/{}, /*rhs_dilation=*/{2, 2}, - ComputationBuilder::CreateDefaultConvDimensionNumbers()); + XlaBuilder::CreateDefaultConvDimensionNumbers()); Array4D expected(1, 1, 2, 2, {3924, 4257, 5922, 6255}); ComputeAndCompareR4(&builder, expected, {}, error_spec_); } XLA_TEST_F(ConvolutionVariantsTest, FlatLhsDilation1D) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); std::vector input_data(1 * 1 * 1 * 5); std::iota(input_data.begin(), input_data.end(), 1.0); @@ -659,14 +660,14 @@ XLA_TEST_F(ConvolutionVariantsTest, FlatLhsDilation1D) { builder.ConvGeneralDilated( /*lhs=*/input, /*rhs=*/filter, /*window_strides=*/{}, /*padding=*/{}, /*lhs_dilation=*/{1, 2}, /*rhs_dilation=*/{}, - ComputationBuilder::CreateDefaultConvDimensionNumbers()); + XlaBuilder::CreateDefaultConvDimensionNumbers()); Array4D expected(1, 1, 1, 8, {10, 2, 20, 3, 30, 4, 40, 5}); ComputeAndCompareR4(&builder, expected, {}, error_spec_); } XLA_TEST_F(ConvolutionVariantsTest, FlatLhsDilation) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); std::vector input_data(1 * 1 * 3 * 4); std::iota(input_data.begin(), input_data.end(), 1.0); @@ -682,8 +683,7 @@ XLA_TEST_F(ConvolutionVariantsTest, FlatLhsDilation) { builder.ConvGeneralDilated( /*lhs=*/input, /*rhs=*/filter, /*window_strides=*/{2, 1}, /*padding=*/{{1, 0}, {0, 0}}, /*lhs_dilation=*/{3, 2}, - /*rhs_dilation=*/{}, - ComputationBuilder::CreateDefaultConvDimensionNumbers()); + /*rhs_dilation=*/{}, XlaBuilder::CreateDefaultConvDimensionNumbers()); Array4D expected(1, 1, 3, 5, {204, 40, 406, 60, 608, // @@ -693,7 +693,7 @@ XLA_TEST_F(ConvolutionVariantsTest, FlatLhsDilation) { } XLA_TEST_F(ConvolutionVariantsTest, NegativePaddingOnBothEnds) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); std::vector input_data(1 * 1 * 1 * 5); std::iota(input_data.begin(), input_data.end(), 1.0); @@ -705,14 +705,14 @@ XLA_TEST_F(ConvolutionVariantsTest, NegativePaddingOnBothEnds) { builder.ConvGeneral( /*lhs=*/input, /*rhs=*/filter, /*window_strides=*/{}, /*padding=*/{{0, 0}, {-1, -1}}, - ComputationBuilder::CreateDefaultConvDimensionNumbers()); + XlaBuilder::CreateDefaultConvDimensionNumbers()); Array4D expected(1, 1, 1, 2, {23, 34}); ComputeAndCompareR4(&builder, expected, {}, error_spec_); } XLA_TEST_F(ConvolutionVariantsTest, NegativePaddingLowAndPositivePaddingHigh) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); std::vector input_data(1 * 1 * 1 * 5); std::iota(input_data.begin(), input_data.end(), 1.0); @@ -724,14 +724,14 @@ XLA_TEST_F(ConvolutionVariantsTest, NegativePaddingLowAndPositivePaddingHigh) { builder.ConvGeneral( /*lhs=*/input, /*rhs=*/filter, /*window_strides=*/{}, /*padding=*/{{0, 0}, {-1, 2}}, - ComputationBuilder::CreateDefaultConvDimensionNumbers()); + XlaBuilder::CreateDefaultConvDimensionNumbers()); Array4D expected(1, 1, 1, 5, {23, 34, 45, 50, 0}); ComputeAndCompareR4(&builder, expected, {}, error_spec_); } XLA_TEST_F(ConvolutionVariantsTest, PositivePaddingLowAndNegativePaddingHigh) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); std::vector input_data(1 * 1 * 1 * 5); std::iota(input_data.begin(), input_data.end(), 1.0); @@ -743,14 +743,14 @@ XLA_TEST_F(ConvolutionVariantsTest, PositivePaddingLowAndNegativePaddingHigh) { builder.ConvGeneral( /*lhs=*/input, /*rhs=*/filter, /*window_strides=*/{}, /*padding=*/{{0, 0}, {2, -1}}, - ComputationBuilder::CreateDefaultConvDimensionNumbers()); + XlaBuilder::CreateDefaultConvDimensionNumbers()); Array4D expected(1, 1, 1, 5, {0, 1, 12, 23, 34}); ComputeAndCompareR4(&builder, expected, {}, error_spec_); } XLA_TEST_F(ConvolutionVariantsTest, PositivePaddingAndDilation) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); std::vector input_data(1 * 1 * 1 * 5); std::iota(input_data.begin(), input_data.end(), 1.0); @@ -763,7 +763,7 @@ XLA_TEST_F(ConvolutionVariantsTest, PositivePaddingAndDilation) { /*lhs=*/input, /*rhs=*/filter, /*window_strides=*/{}, /*padding=*/{{0, 0}, {3, 2}}, /*lhs_dilation=*/{1, 2}, /*rhs_dilation=*/{1, 2}, - ComputationBuilder::CreateDefaultConvDimensionNumbers()); + XlaBuilder::CreateDefaultConvDimensionNumbers()); // input: // [1, 2, 3, 4, 5] --dilate-> [1, 0, 2, 0, 3, 0, 4, 0, 5] @@ -775,7 +775,7 @@ XLA_TEST_F(ConvolutionVariantsTest, PositivePaddingAndDilation) { ComputeAndCompareR4(&builder, expected, {}, error_spec_); } XLA_TEST_F(ConvolutionVariantsTest, NegativePaddingAndDilation) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); std::vector input_data(1 * 1 * 1 * 5); std::iota(input_data.begin(), input_data.end(), 1.0); @@ -788,7 +788,7 @@ XLA_TEST_F(ConvolutionVariantsTest, NegativePaddingAndDilation) { /*lhs=*/input, /*rhs=*/filter, /*window_strides=*/{}, /*padding=*/{{0, 0}, {-3, -2}}, /*lhs_dilation=*/{1, 2}, /*rhs_dilation=*/{1, 2}, - ComputationBuilder::CreateDefaultConvDimensionNumbers()); + XlaBuilder::CreateDefaultConvDimensionNumbers()); // input: // [1, 2, 3, 4, 5] --dilate-> [1, 0, 2, 0, 3, 0, 4, 0, 5] @@ -821,7 +821,7 @@ XLA_TEST_F(ConvolutionVariantsTest, RandomData_Input1x1x2x3_Filter2x1x1x2) { Array4D input_array(bs, iz, iy, ix, input_data); Array4D filter_array(oz, iz, ky, kx, kernel_data); - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto input = builder.ConstantR4FromArray4D(input_array); auto filter = builder.ConstantR4FromArray4D(filter_array); builder.Conv(input, filter, {1, 1}, Padding::kValid); @@ -854,7 +854,7 @@ XLA_TEST_F(ConvolutionVariantsTest, RandomData_Input1x16x1x1_Filter1x16x1x1) { Array4D input_array(bs, iz, iy, ix, input_data); Array4D filter_array(oz, iz, ky, kx, kernel_data); - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto input = builder.ConstantR4FromArray4D(input_array); auto filter = builder.ConstantR4FromArray4D(filter_array); builder.Conv(input, filter, {1, 1}, Padding::kValid); @@ -887,7 +887,7 @@ XLA_TEST_F(ConvolutionVariantsTest, RandomData_Input16x16x1x1_Filter1x16x1x1) { Array4D input_array(bs, iz, iy, ix, input_data); Array4D filter_array(oz, iz, ky, kx, kernel_data); - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto input = builder.ConstantR4FromArray4D(input_array); auto filter = builder.ConstantR4FromArray4D(filter_array); builder.Conv(input, filter, {1, 1}, Padding::kValid); @@ -920,7 +920,7 @@ XLA_TEST_F(ConvolutionVariantsTest, RandomData_Input16x16x1x1_Filter16x16x1x1) { Array4D input_array(bs, iz, iy, ix, input_data); Array4D filter_array(oz, iz, ky, kx, kernel_data); - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto input = builder.ConstantR4FromArray4D(input_array); auto filter = builder.ConstantR4FromArray4D(filter_array); builder.Conv(input, filter, {1, 1}, Padding::kValid); @@ -954,7 +954,7 @@ XLA_TEST_F(ConvolutionVariantsTest, Array4D input_array(bs, iz, iy, ix, input_data); Array4D filter_array(oz, iz, ky, kx, kernel_data); - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto input = builder.ConstantR4FromArray4D(input_array); auto filter = builder.ConstantR4FromArray4D(filter_array); builder.Conv(input, filter, {1, 1}, Padding::kValid); @@ -966,7 +966,7 @@ XLA_TEST_F(ConvolutionVariantsTest, } XLA_TEST_F(ConvolutionVariantsTest, Filter1x2x1x1Input1x2x3x1GeneralPadding) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); std::vector input_data(1 * 2 * 3 * 1); std::iota(input_data.begin(), input_data.end(), 1.0); @@ -1010,7 +1010,7 @@ XLA_TEST_F(ConvolutionVariantsTest, Filter1x2x1x1Input1x2x3x1GeneralPadding) { } XLA_TEST_F(ConvolutionVariantsTest, Filter1x1x1x1Input1x2x3x1GeneralPadding) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); std::vector input_data(1 * 2 * 3 * 1); std::iota(input_data.begin(), input_data.end(), 1.0); @@ -1054,7 +1054,7 @@ XLA_TEST_F(ConvolutionVariantsTest, Filter1x1x1x1Input1x2x3x1GeneralPadding) { } XLA_TEST_F(ConvolutionVariantsTest, Filter1x1x1x1Input1x2x3x1NoPadding) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); std::vector input_data(1 * 2 * 3 * 1); std::iota(input_data.begin(), input_data.end(), 1.0); @@ -1095,7 +1095,7 @@ XLA_TEST_F(ConvolutionVariantsTest, Filter1x1x1x1Input1x2x3x1NoPadding) { } XLA_TEST_F(ConvolutionVariantsTest, Filter1x1x2x3Input1x2x3x2NoPadding) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); std::vector input_data(1 * 2 * 3 * 2); std::iota(input_data.begin(), input_data.end(), 1.0); @@ -1147,7 +1147,7 @@ XLA_TEST_F(ConvolutionVariantsTest, Filter1x1x2x3Input1x2x3x2NoPadding) { // BackwardInputConv([1,2,3], [5,6], padding_low=0, padding_high=1) XLA_TEST_F(ConvolutionVariantsTest, BackwardInputLowPaddingLessThanHighPadding) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto gradients = builder.ConstantR4FromArray4D( Array4D(1, 1, 1, 3, /*values=*/{1, 2, 3})); @@ -1166,19 +1166,18 @@ XLA_TEST_F(ConvolutionVariantsTest, // BackwardInputConv([1], [1,10,100], stride=3, padding=(2,1)) XLA_TEST_F(ConvolutionVariantsTest, BackwardInputLowPaddingGreaterThanHighPadding) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto gradients = builder.ConstantR4FromArray4D( Array4D(1, 1, 1, 1, /*values=*/{1})); auto weights = builder.ConstantR4FromArray4D( Array4D(1, 1, 1, 3, /*values=*/{1, 10, 100})); auto mirrored_weights = builder.Rev(weights, {2, 3}); - builder.ConvGeneralDilated( - gradients, mirrored_weights, - /*window_strides=*/{1, 1}, - /*padding=*/{{0, 0}, {0, 3}}, - /*lhs_dilation=*/{1, 3}, /*rhs_dilation=*/{}, - ComputationBuilder::CreateDefaultConvDimensionNumbers()); + builder.ConvGeneralDilated(gradients, mirrored_weights, + /*window_strides=*/{1, 1}, + /*padding=*/{{0, 0}, {0, 3}}, + /*lhs_dilation=*/{1, 3}, /*rhs_dilation=*/{}, + XlaBuilder::CreateDefaultConvDimensionNumbers()); ComputeAndCompareR4(&builder, {{{{100, 0}}}}, {}, error_spec_); } @@ -1187,7 +1186,7 @@ XLA_TEST_F(ConvolutionVariantsTest, // into // BackwardInputConv([1], [1,10,100], padding=(1,1)) XLA_TEST_F(ConvolutionVariantsTest, BackwardInputEvenPadding) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto gradients = builder.ConstantR4FromArray4D( Array4D(1, 1, 1, 1, /*values=*/{1})); @@ -1208,7 +1207,7 @@ XLA_TEST_F(ConvolutionVariantsTest, BackwardInputEvenPadding) { // However, XLA:GPU doesn't actually fuse it because PadInsertion doesn't // support negative padding on backward convolution yet (b/32744257). XLA_TEST_F(ConvolutionVariantsTest, BackwardInputWithNegativePaddingHigh) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto gradients = builder.ConstantR4FromArray4D( Array4D(1, 1, 1, 3, /*values=*/{1, 2, 3})); @@ -1224,7 +1223,7 @@ XLA_TEST_F(ConvolutionVariantsTest, BackwardInputWithNegativePaddingHigh) { XLA_TEST_F(ConvolutionVariantsTest, BackwardFilterLowPaddingLessThanHighPadding) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); // activations: 1,2,3,4 ---pad--> 0,1,2,3,4,0,0 // gradients: 100,10,1 -dilate-> 100,0,10,0,1 @@ -1240,7 +1239,7 @@ XLA_TEST_F(ConvolutionVariantsTest, /*window_strides=*/{1, 1}, /*padding=*/{{0, 0}, {1, 2}}, /*lhs_dilation=*/{}, /*rhs_dilation=*/{1, 2}, - ComputationBuilder::CreateDefaultConvDimensionNumbers()); + XlaBuilder::CreateDefaultConvDimensionNumbers()); builder.Transpose(forward_conv, {0, 1, 2, 3}); ComputeAndCompareR4(&builder, {{{{24, 130, 240}}}}, {}, error_spec_); @@ -1248,7 +1247,7 @@ XLA_TEST_F(ConvolutionVariantsTest, XLA_TEST_F(ConvolutionVariantsTest, BackwardFilterLowPaddingGreaterThanHighPadding) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); // activations: 1,2,3,4 ---pad--> 0,0,1,2,3,4 // gradients: 100,10,1 -dilate-> 100,0,10,0,1 @@ -1266,14 +1265,14 @@ XLA_TEST_F(ConvolutionVariantsTest, /*window_strides=*/{1, 1}, /*padding=*/{{0, 0}, {2, 0}}, /*lhs_dilation=*/{}, /*rhs_dilation=*/{1, 2}, - ComputationBuilder::CreateDefaultConvDimensionNumbers()); + XlaBuilder::CreateDefaultConvDimensionNumbers()); builder.Transpose(forward_conv, {0, 1, 2, 3}); ComputeAndCompareR4(&builder, {{{{13, 24}}}}, {}, error_spec_); } XLA_TEST_F(ConvolutionVariantsTest, BackwardFilterEvenPadding) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); // activations: 1,2,3,4 ---pad--> 0,0,1,2,3,4,0 // gradients: 100,10,1 -dilate-> 100,0,10,0,1 @@ -1293,14 +1292,14 @@ XLA_TEST_F(ConvolutionVariantsTest, BackwardFilterEvenPadding) { /*window_strides=*/{1, 1}, /*padding=*/{{0, 0}, {2, 1}}, /*lhs_dilation=*/{}, /*rhs_dilation=*/{1, 2}, - ComputationBuilder::CreateDefaultConvDimensionNumbers()); + XlaBuilder::CreateDefaultConvDimensionNumbers()); builder.Transpose(forward_conv, {0, 1, 2, 3}); ComputeAndCompareR4(&builder, {{{{13, 24, 130}}}}, {}, error_spec_); } XLA_TEST_F(ConvolutionVariantsTest, BackwardInputEvenPadding1D) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto gradients = builder.ConstantR3FromArray3D( Array3D(1, 1, 1, /*value=*/1)); @@ -1314,26 +1313,26 @@ XLA_TEST_F(ConvolutionVariantsTest, BackwardInputEvenPadding1D) { } XLA_TEST_F(ConvolutionVariantsTest, BackwardFilterEvenPadding1D) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto activations = builder.ConstantR3FromArray3D(Array3D({{{1, 2, 3, 4}}})); auto gradients = builder.ConstantR3FromArray3D(Array3D({{{100, 10, 1}}})); - auto forward_conv = builder.ConvGeneralDilated( - activations, gradients, - /*window_strides=*/{1}, - /*padding=*/{{2, 1}}, - /*lhs_dilation=*/{}, /*rhs_dilation=*/{2}, - ComputationBuilder::CreateDefaultConvDimensionNumbers( - /*num_spatial_dims=*/1)); + auto forward_conv = + builder.ConvGeneralDilated(activations, gradients, + /*window_strides=*/{1}, + /*padding=*/{{2, 1}}, + /*lhs_dilation=*/{}, /*rhs_dilation=*/{2}, + XlaBuilder::CreateDefaultConvDimensionNumbers( + /*num_spatial_dims=*/1)); builder.Transpose(forward_conv, {0, 1, 2}); ComputeAndCompareR3(&builder, {{{13, 24, 130}}}, {}, error_spec_); } XLA_TEST_F(ConvolutionVariantsTest, BackwardInputEvenPadding3D) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto gradients_flat = Literal::CreateR1({1}); auto gradients_literal = @@ -1357,7 +1356,7 @@ XLA_TEST_F(ConvolutionVariantsTest, BackwardInputEvenPadding3D) { } XLA_TEST_F(ConvolutionVariantsTest, BackwardFilterEvenPadding3D) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto activations_flat = Literal::CreateR1({1, 2, 3, 4}); auto activations_literal = @@ -1378,7 +1377,7 @@ XLA_TEST_F(ConvolutionVariantsTest, BackwardFilterEvenPadding3D) { /*window_strides=*/{1, 1, 1}, /*padding=*/{{0, 0}, {0, 0}, {2, 1}}, /*lhs_dilation=*/{}, /*rhs_dilation=*/{1, 1, 2}, - ComputationBuilder::CreateDefaultConvDimensionNumbers( + XlaBuilder::CreateDefaultConvDimensionNumbers( /*num_spatial_dims=*/3)); builder.Transpose(forward_conv, {0, 1, 2, 3, 4}); ComputeAndCompareLiteral(&builder, *expected_literal, {}, error_spec_); -- GitLab From bb4a80c92105426ccf20a98c4291a1a3f8499b54 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 23 Apr 2018 15:56:12 -0700 Subject: [PATCH 172/434] Implement exporting the keys/values in a hash table. PiperOrigin-RevId: 193999421 --- tensorflow/contrib/lookup/lookup_ops_test.py | 6 +++++ .../core/kernels/initializable_lookup_table.h | 2 +- tensorflow/core/kernels/lookup_table_op.h | 24 +++++++++++++++++++ tensorflow/python/ops/lookup_ops.py | 20 ++++++++++++++++ 4 files changed, 51 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/lookup/lookup_ops_test.py b/tensorflow/contrib/lookup/lookup_ops_test.py index f681b7b132..5d4682ec9f 100644 --- a/tensorflow/contrib/lookup/lookup_ops_test.py +++ b/tensorflow/contrib/lookup/lookup_ops_test.py @@ -58,6 +58,12 @@ class HashTableOpTest(test.TestCase): result = output.eval() self.assertAllEqual([0, 1, -1], result) + exported_keys_tensor, exported_values_tensor = table.export() + + self.assertItemsEqual([b"brain", b"salad", b"surgery"], + exported_keys_tensor.eval()) + self.assertItemsEqual([0, 1, 2], exported_values_tensor.eval()) + def testHashTableFindHighRank(self): with self.test_session(): default_val = -1 diff --git a/tensorflow/core/kernels/initializable_lookup_table.h b/tensorflow/core/kernels/initializable_lookup_table.h index edb779540f..990cbceac2 100644 --- a/tensorflow/core/kernels/initializable_lookup_table.h +++ b/tensorflow/core/kernels/initializable_lookup_table.h @@ -51,7 +51,7 @@ class InitializableLookupTable : public LookupInterface { "Insert not supported by InitializableLookupTable implementations"); } - Status ExportValues(OpKernelContext* context) final { + Status ExportValues(OpKernelContext* context) { return errors::Unimplemented( "ExportValues not supported by InitializableLookupTable " "implementations"); diff --git a/tensorflow/core/kernels/lookup_table_op.h b/tensorflow/core/kernels/lookup_table_op.h index 29a0cc91fe..3977f16299 100644 --- a/tensorflow/core/kernels/lookup_table_op.h +++ b/tensorflow/core/kernels/lookup_table_op.h @@ -177,6 +177,30 @@ class HashTable : public InitializableLookupTable { return table_ ? table_->size() : 0; } + Status ExportValues(OpKernelContext* context) override { + if (!is_initialized_) { + return errors::Aborted("HashTable is not initialized."); + } + + const int64 size = table_->size(); + + Tensor* keys; + Tensor* values; + TF_RETURN_IF_ERROR( + context->allocate_output("keys", TensorShape({size}), &keys)); + TF_RETURN_IF_ERROR( + context->allocate_output("values", TensorShape({size}), &values)); + + auto keys_data = keys->flat(); + auto values_data = values->flat(); + int64 i = 0; + for (auto it = table_->begin(); it != table_->end(); ++it, ++i) { + keys_data(i) = it->first; + values_data(i) = it->second; + } + return Status::OK(); + } + DataType key_dtype() const override { return DataTypeToEnum::v(); } DataType value_dtype() const override { return DataTypeToEnum::v(); } diff --git a/tensorflow/python/ops/lookup_ops.py b/tensorflow/python/ops/lookup_ops.py index 6f043f60e6..0e547689cc 100644 --- a/tensorflow/python/ops/lookup_ops.py +++ b/tensorflow/python/ops/lookup_ops.py @@ -277,7 +277,27 @@ class HashTable(InitializableLookupTableBase): name=scope) super(HashTable, self).__init__(table_ref, default_value, initializer) + self._value_shape = self._default_value.get_shape() + def export(self, name=None): + """Returns tensors of all keys and values in the table. + + Args: + name: A name for the operation (optional). + + Returns: + A pair of tensors with the first tensor containing all keys and the + second tensors containing all values in the table. + """ + with ops.name_scope(name, "%s_Export" % self._name, + [self._table_ref]) as name: + with ops.colocate_with(self._table_ref): + exported_keys, exported_values = gen_lookup_ops.lookup_table_export_v2( + self._table_ref, self._key_dtype, self._value_dtype, name=name) + + exported_values.set_shape(exported_keys.get_shape().concatenate( + self._value_shape)) + return exported_keys, exported_values class TableInitializerBase(object): """Base class for lookup table initializers.""" -- GitLab From ff15c81e2b92ef8fb47bb15790cffd18377a4ef2 Mon Sep 17 00:00:00 2001 From: Andrew Cotter Date: Mon, 23 Apr 2018 15:57:02 -0700 Subject: [PATCH 173/434] This is a library for performing constrained optimization. It defines two interfaces: ConstrainedMinimizationProblem, which specifies a constrained optimization problem, and ConstrainedOptimizer, which is slightly different from a tf.train.Optimizer, mostly due to the fact that it is meant to optimize ConstrainedMinimizationProblems. In addition to these two interfaces, three ConstrainedOptimizer implementations are included, as well as helper functions which, given a set of candidate solutions, heuristically find the best candidate (to the constrained problem), or the best distribution over candidates. For more details, please see our arXiv paper: "https://arxiv.org/abs/1804.06500". PiperOrigin-RevId: 193999550 --- tensorflow/contrib/BUILD | 1 + tensorflow/contrib/__init__.py | 1 + tensorflow/contrib/cmake/python_modules.txt | 2 + .../contrib/constrained_optimization/BUILD | 91 +++ .../constrained_optimization/README.md | 345 ++++++++++ .../constrained_optimization/__init__.py | 41 ++ .../python/candidates.py | 319 ++++++++++ .../python/candidates_test.py | 95 +++ .../constrained_minimization_problem.py | 123 ++++ .../python/constrained_optimizer.py | 208 ++++++ .../python/external_regret_optimizer.py | 375 +++++++++++ .../python/external_regret_optimizer_test.py | 136 ++++ .../python/swap_regret_optimizer.py | 595 ++++++++++++++++++ .../python/swap_regret_optimizer_test.py | 212 +++++++ .../python/test_util.py | 58 ++ tensorflow/tools/pip_package/BUILD | 1 + 16 files changed, 2603 insertions(+) create mode 100644 tensorflow/contrib/constrained_optimization/BUILD create mode 100644 tensorflow/contrib/constrained_optimization/README.md create mode 100644 tensorflow/contrib/constrained_optimization/__init__.py create mode 100644 tensorflow/contrib/constrained_optimization/python/candidates.py create mode 100644 tensorflow/contrib/constrained_optimization/python/candidates_test.py create mode 100644 tensorflow/contrib/constrained_optimization/python/constrained_minimization_problem.py create mode 100644 tensorflow/contrib/constrained_optimization/python/constrained_optimizer.py create mode 100644 tensorflow/contrib/constrained_optimization/python/external_regret_optimizer.py create mode 100644 tensorflow/contrib/constrained_optimization/python/external_regret_optimizer_test.py create mode 100644 tensorflow/contrib/constrained_optimization/python/swap_regret_optimizer.py create mode 100644 tensorflow/contrib/constrained_optimization/python/swap_regret_optimizer_test.py create mode 100644 tensorflow/contrib/constrained_optimization/python/test_util.py diff --git a/tensorflow/contrib/BUILD b/tensorflow/contrib/BUILD index 8edb8654b8..abdbdb4cd2 100644 --- a/tensorflow/contrib/BUILD +++ b/tensorflow/contrib/BUILD @@ -31,6 +31,7 @@ py_library( "//tensorflow/contrib/cluster_resolver:cluster_resolver_py", "//tensorflow/contrib/coder:coder_py", "//tensorflow/contrib/compiler:compiler_py", + "//tensorflow/contrib/constrained_optimization", "//tensorflow/contrib/copy_graph:copy_graph_py", "//tensorflow/contrib/crf:crf_py", "//tensorflow/contrib/cudnn_rnn:cudnn_rnn_py", diff --git a/tensorflow/contrib/__init__.py b/tensorflow/contrib/__init__.py index 0d163daa6e..7f33d460dc 100644 --- a/tensorflow/contrib/__init__.py +++ b/tensorflow/contrib/__init__.py @@ -29,6 +29,7 @@ from tensorflow.contrib import cloud from tensorflow.contrib import cluster_resolver from tensorflow.contrib import coder from tensorflow.contrib import compiler +from tensorflow.contrib import constrained_optimization from tensorflow.contrib import copy_graph from tensorflow.contrib import crf from tensorflow.contrib import cudnn_rnn diff --git a/tensorflow/contrib/cmake/python_modules.txt b/tensorflow/contrib/cmake/python_modules.txt index 932a6eeeaa..2554b3a6e0 100644 --- a/tensorflow/contrib/cmake/python_modules.txt +++ b/tensorflow/contrib/cmake/python_modules.txt @@ -147,6 +147,8 @@ tensorflow/contrib/coder/python tensorflow/contrib/coder/python/layers tensorflow/contrib/coder/python/ops tensorflow/contrib/compiler +tensorflow/contrib/constrained_optimization +tensorflow/contrib/constrained_optimization/python tensorflow/contrib/copy_graph tensorflow/contrib/copy_graph/python tensorflow/contrib/copy_graph/python/util diff --git a/tensorflow/contrib/constrained_optimization/BUILD b/tensorflow/contrib/constrained_optimization/BUILD new file mode 100644 index 0000000000..619153df67 --- /dev/null +++ b/tensorflow/contrib/constrained_optimization/BUILD @@ -0,0 +1,91 @@ +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) # Apache 2.0 + +exports_files(["LICENSE"]) + +load("//tensorflow:tensorflow.bzl", "py_test") + +# Transitive dependencies of this target will be included in the pip package. +py_library( + name = "constrained_optimization_pip", + deps = [ + ":constrained_optimization", + ":test_util", + ], +) + +py_library( + name = "constrained_optimization", + srcs = [ + "__init__.py", + "python/candidates.py", + "python/constrained_minimization_problem.py", + "python/constrained_optimizer.py", + "python/external_regret_optimizer.py", + "python/swap_regret_optimizer.py", + ], + srcs_version = "PY2AND3", + deps = [ + "//tensorflow/python:control_flow_ops", + "//tensorflow/python:dtypes", + "//tensorflow/python:framework", + "//tensorflow/python:standard_ops", + "//tensorflow/python:state_ops", + "//tensorflow/python:training", + "//third_party/py/numpy", + "@six_archive//:six", + ], +) + +py_test( + name = "candidates_test", + srcs = ["python/candidates_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":constrained_optimization", + "//tensorflow/python:client_testlib", + "//third_party/py/numpy", + ], +) + +# NOTE: This library can't be "testonly" since it needs to be included in the +# pip package. +py_library( + name = "test_util", + srcs = ["python/test_util.py"], + srcs_version = "PY2AND3", + deps = [ + ":constrained_optimization", + "//tensorflow/python:dtypes", + "//tensorflow/python:standard_ops", + ], +) + +py_test( + name = "external_regret_optimizer_test", + srcs = ["python/external_regret_optimizer_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":constrained_optimization", + ":test_util", + "//tensorflow/python:client_testlib", + "//tensorflow/python:standard_ops", + "//tensorflow/python:training", + "//third_party/py/numpy", + ], +) + +py_test( + name = "swap_regret_optimizer_test", + srcs = ["python/swap_regret_optimizer_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":constrained_optimization", + ":test_util", + "//tensorflow/python:client_testlib", + "//tensorflow/python:standard_ops", + "//tensorflow/python:training", + "//third_party/py/numpy", + ], +) diff --git a/tensorflow/contrib/constrained_optimization/README.md b/tensorflow/contrib/constrained_optimization/README.md new file mode 100644 index 0000000000..c65a150464 --- /dev/null +++ b/tensorflow/contrib/constrained_optimization/README.md @@ -0,0 +1,345 @@ + + +# ConstrainedOptimization (TFCO) + +TFCO is a library for optimizing inequality-constrained problems in TensorFlow. +Both the objective function and the constraints are represented as Tensors, +giving users the maximum amount of flexibility in specifying their optimization +problems. + +This flexibility makes optimization considerably more difficult: on a non-convex +problem, if one uses the "standard" approach of introducing a Lagrange +multiplier for each constraint, and then jointly maximizing over the Lagrange +multipliers and minimizing over the model parameters, then a stable stationary +point might not even *exist*. Hence, in some cases, oscillation, instead of +convergence, is inevitable. + +Thankfully, it turns out that even if, over the course of optimization, no +*particular* iterate does a good job of minimizing the objective while +satisfying the constraints, the *sequence* of iterates, on average, usually +will. This observation suggests the following approach: at training time, we'll +periodically snapshot the model state during optimization; then, at evaluation +time, each time we're given a new example to evaluate, we'll sample one of the +saved snapshots uniformly at random, and apply it to the example. This +*stochastic model* will generally perform well, both with respect to the +objective function, and the constraints. + +In fact, we can do better: it's possible to post-process the set of snapshots to +find a distribution over at most $$m+1$$ snapshots, where $$m$$ is the number of +constraints, that will be at least as good (and will usually be much better) +than the (much larger) uniform distribution described above. If you're unable or +unwilling to use a stochastic model at all, then you can instead use a heuristic +to choose the single best snapshot. + +For full details, motivation, and theoretical results on the approach taken by +this library, please refer to: + +> Cotter, Jiang and Sridharan. "Two-Player Games for Efficient Non-Convex +> Constrained Optimization". +> [https://arxiv.org/abs/1804.06500](https://arxiv.org/abs/1804.06500) + +which will be referred to as [CoJiSr18] throughout the remainder of this +document. + +### Proxy Constraints + +Imagine that we want to constrain the recall of a binary classifier to be at +least 90%. Since the recall is proportional to the number of true positive +classifications, which itself is a sum of indicator functions, this constraint +is non-differentible, and therefore cannot be used in a problem that will be +optimized using a (stochastic) gradient-based algorithm. + +For this and similar problems, TFCO supports so-called *proxy constraints*, +which are (at least semi-differentiable) approximations of the original +constraints. For example, one could create a proxy recall function by replacing +the indicator functions with sigmoids. During optimization, each proxy +constraint function will be penalized, with the magnitude of the penalty being +chosen to satisfy the corresponding *original* (non-proxy) constraint. + +On a problem including proxy constraints—even a convex problem—the +Lagrangian approach discussed above isn't guaranteed to work. However, a +different algorithm, based on minimizing *swap regret*, does work. Aside from +this difference, the recommended procedure for optimizing a proxy-constrained +problem remains the same: periodically snapshot the model during optimization, +and then either find the best $$m+1$$-sized distribution, or heuristically +choose the single best snapshot. + +## Components + +* [constrained_minimization_problem](https://www.tensorflow.org/code/tensorflow/contrib/constrained_optimization/python/constrained_minimization_problem.py): + contains the `ConstrainedMinimizationProblem` interface. Your own + constrained optimization problems should be represented using + implementations of this interface. + +* [constrained_optimizer](https://www.tensorflow.org/code/tensorflow/contrib/constrained_optimization/python/constrained_optimizer.py): + contains the `ConstrainedOptimizer` interface, which is similar to (but + different from) `tf.train.Optimizer`, with the main difference being that + `ConstrainedOptimizer`s are given `ConstrainedMinimizationProblem`s to + optimize, and perform constrained optimization. + + * [external_regret_optimizer](https://www.tensorflow.org/code/tensorflow/contrib/constrained_optimization/python/external_regret_optimizer.py): + contains the `AdditiveExternalRegretOptimizer` implementation, which is + a `ConstrainedOptimizer` implementing the Lagrangian approach discussed + above (with additive updates to the Lagrange multipliers). You should + use this optimizer for problems *without* proxy constraints. It may also + work for problems with proxy constraints, but we recommend using a swap + regret optimizer, instead. + + This optimizer is most similar to Algorithm 3 in Appendix C.3 of + [CoJiSr18], and is discussed in Section 3. The two differences are that + it uses proxy constraints (if they're provided) in the update of the + model parameters, and uses `tf.train.Optimizer`s, instead of SGD, for + the "inner" updates. + + * [swap_regret_optimizer](https://www.tensorflow.org/code/tensorflow/contrib/constrained_optimization/python/swap_regret_optimizer.py): + contains the `AdditiveSwapRegretOptimizer` and + `MultiplicativeSwapRegretOptimizer` implementations, which are + `ConstrainedOptimizer`s implementing the swap-regret minimization + approach mentioned above (with additive or multiplicative updates, + respectively, to the parameters associated with the + constraints—these parameters are not Lagrange multipliers, but + play a similar role). You should use one of these optimizers (we suggest + `MultiplicativeSwapRegretOptimizer`) for problems *with* proxy + constraints. + + The `MultiplicativeSwapRegretOptimizer` is most similar to Algorithm 2 + in Section 4 of [CoJiSr18], with the difference being that it uses + `tf.train.Optimizer`s, instead of SGD, for the "inner" updates. The + `AdditiveSwapRegretOptimizer` differs further in that it performs + additive (instead of multiplicative) updates of the stochastic matrix. + +* [candidates](https://www.tensorflow.org/code/tensorflow/contrib/constrained_optimization/python/candidates.py): + contains two functions, `find_best_candidate_distribution` and + `find_best_candidate_index`. Both of these functions are given a set of + candidate solutions to a constrained optimization problem, from which the + former finds the best distribution over at most $$m+1$$ candidates, and the + latter heuristically finds the single best candidate. As discussed above, + the set of candidates will typically be model snapshots saved periodically + during optimization. Both of these functions require that scipy be + installed. + + The `find_best_candidate_distribution` function implements the approach + described in Lemma 3 of [CoJiSr18], while `find_best_candidate_index` + implements the heuristic used for hyperparameter search in the experiments + of Section 5.2. + +## Convex Example with Proxy Constraints + +This is a simple example of recall-constrained optimization on simulated data: +we will try to find a classifier that minimizes the average hinge loss while +constraining recall to be at least 90%. + +We'll start with the required imports—notice the definition of `tfco`: + +```python +import math +import numpy as np +import tensorflow as tf + +tfco = tf.contrib.constrained_optimization +``` + +We'll now create an implementation of the `ConstrainedMinimizationProblem` class +for this problem. The constructor takes three parameters: a Tensor containing +the classification labels (0 or 1) for every training example, another Tensor +containing the model's predictions on every training example (sometimes called +the "logits"), and the lower bound on recall that will be enforced using a +constraint. + +This implementation will contain both constraints *and* proxy constraints: the +former represents the constraint that the true recall (defined in terms of the +*number* of true positives) be at least `recall_lower_bound`, while the latter +represents the same constraint, but on a hinge approximation of the recall. + +```python +class ExampleProblem(tfco.ConstrainedMinimizationProblem): + + def __init__(self, labels, predictions, recall_lower_bound): + self._labels = labels + self._predictions = predictions + self._recall_lower_bound = recall_lower_bound + # The number of positively-labeled examples. + self._positive_count = tf.reduce_sum(self._labels) + + @property + def objective(self): + return tf.losses.hinge_loss(labels=self._labels, logits=self._predictions) + + @property + def constraints(self): + true_positives = self._labels * tf.to_float(self._predictions > 0) + true_positive_count = tf.reduce_sum(true_positives) + recall = true_positive_count / self._positive_count + # The constraint is (recall >= self._recall_lower_bound), which we convert + # to (self._recall_lower_bound - recall <= 0) because + # ConstrainedMinimizationProblems must always provide their constraints in + # the form (tensor <= 0). + # + # The result of this function should be a tensor, with each element being + # a quantity that is constrained to be nonpositive. We only have one + # constraint, so we return a one-element tensor. + return self._recall_lower_bound - recall + + @property + def proxy_constraints(self): + # Use 1 - hinge since we're SUBTRACTING recall in the constraint function, + # and we want the proxy constraint function to be convex. + true_positives = self._labels * tf.minimum(1.0, self._predictions) + true_positive_count = tf.reduce_sum(true_positives) + recall = true_positive_count / self._positive_count + # Please see the corresponding comment in the constraints property. + return self._recall_lower_bound - recall +``` + +We'll now create a simple simulated dataset by sampling 1000 random +10-dimensional feature vectors from a Gaussian, finding their labels using a +random "ground truth" linear model, and then adding noise by randomly flipping +200 labels. + +```python +# Create a simulated 10-dimensional training dataset consisting of 1000 labeled +# examples, of which 800 are labeled correctly and 200 are mislabeled. +num_examples = 1000 +num_mislabeled_examples = 200 +dimension = 10 +# We will constrain the recall to be at least 90%. +recall_lower_bound = 0.9 + +# Create random "ground truth" parameters to a linear model. +ground_truth_weights = np.random.normal(size=dimension) / math.sqrt(dimension) +ground_truth_threshold = 0 + +# Generate a random set of features for each example. +features = np.random.normal(size=(num_examples, dimension)).astype( + np.float32) / math.sqrt(dimension) +# Compute the labels from these features given the ground truth linear model. +labels = (np.matmul(features, ground_truth_weights) > + ground_truth_threshold).astype(np.float32) +# Add noise by randomly flipping num_mislabeled_examples labels. +mislabeled_indices = np.random.choice( + num_examples, num_mislabeled_examples, replace=False) +labels[mislabeled_indices] = 1 - labels[mislabeled_indices] +``` + +We're now ready to construct our model, and the corresponding optimization +problem. We'll use a linear model of the form $$f(x) = w^T x - t$$, where $$w$$ +is the `weights`, and $$t$$ is the `threshold`. The `problem` variable will hold +an instance of the `ExampleProblem` class we created earlier. + +```python +# Create variables containing the model parameters. +weights = tf.Variable(tf.zeros(dimension), dtype=tf.float32, name="weights") +threshold = tf.Variable(0.0, dtype=tf.float32, name="threshold") + +# Create the optimization problem. +constant_labels = tf.constant(labels, dtype=tf.float32) +constant_features = tf.constant(features, dtype=tf.float32) +predictions = tf.tensordot(constant_features, weights, axes=(1, 0)) - threshold +problem = ExampleProblem( + labels=constant_labels, + predictions=predictions, + recall_lower_bound=recall_lower_bound, +) +``` + +We're almost ready to train our model, but first we'll create a couple of +functions to measure its performance. We're interested in two quantities: the +average hinge loss (which we seek to minimize), and the recall (which we +constrain). + +```python +def average_hinge_loss(labels, predictions): + num_examples, = np.shape(labels) + signed_labels = (labels * 2) - 1 + total_hinge_loss = np.sum(np.maximum(0.0, 1.0 - signed_labels * predictions)) + return total_hinge_loss / num_examples + +def recall(labels, predictions): + positive_count = np.sum(labels) + true_positives = labels * (predictions > 0) + true_positive_count = np.sum(true_positives) + return true_positive_count / positive_count +``` + +As was mentioned earlier, external regret optimizers suffice for problems +without proxy constraints, but swap regret optimizers are recommended for +problems *with* proxy constraints. Since this problem contains proxy +constraints, we use the `MultiplicativeSwapRegretOptimizer`. + +For this problem, the constraint is fairly easy to satisfy, so we can use the +same "inner" optimizer (an `AdagradOptimizer` with a learning rate of 1) for +optimization of both the model parameters (`weights` and `threshold`), and the +internal parameters associated with the constraints (these are the analogues of +the Lagrange multipliers used by the `MultiplicativeSwapRegretOptimizer`). For +more difficult problems, it will often be necessary to use different optimizers, +with different learning rates (presumably found via a hyperparameter search): to +accomplish this, pass *both* the `optimizer` and `constraint_optimizer` +parameters to `MultiplicativeSwapRegretOptimizer`'s constructor. + +Since this is a convex problem (both the objective and proxy constraint +functions are convex), we can just take the last iterate. Periodic snapshotting, +and the use of the `find_best_candidate_distribution` or +`find_best_candidate_index` functions, is generally only necessary for +non-convex problems (and even then, it isn't *always* necessary). + +```python +with tf.Session() as session: + optimizer = tfco.MultiplicativeSwapRegretOptimizer( + optimizer=tf.train.AdagradOptimizer(learning_rate=1.0)) + train_op = optimizer.minimize(problem) + + session.run(tf.global_variables_initializer()) + for ii in xrange(1000): + session.run(train_op) + + trained_weights, trained_threshold = session.run((weights, threshold)) + +trained_predictions = np.matmul(features, trained_weights) - trained_threshold +print("Constrained average hinge loss = %f" % average_hinge_loss( + labels, trained_predictions)) +print("Constrained recall = %f" % recall(labels, trained_predictions)) +``` + +Running the above code gives the following output (due to the randomness of the +dataset, you'll get a different result when you run it): + +```none +Constrained average hinge loss = 0.710019 +Constrained recall = 0.899811 +``` + +As we hoped, the recall is extremely close to 90%—and, thanks to the use +of proxy constraints, this is the *true* recall, not a hinge approximation. + +For comparison, let's try optimizing the same problem *without* the recall +constraint: + +```python +with tf.Session() as session: + optimizer = tf.train.AdagradOptimizer(learning_rate=1.0) + # For optimizing the unconstrained problem, we just minimize the "objective" + # portion of the minimization problem. + train_op = optimizer.minimize(problem.objective) + + session.run(tf.global_variables_initializer()) + for ii in xrange(1000): + session.run(train_op) + + trained_weights, trained_threshold = session.run((weights, threshold)) + +trained_predictions = np.matmul(features, trained_weights) - trained_threshold +print("Unconstrained average hinge loss = %f" % average_hinge_loss( + labels, trained_predictions)) +print("Unconstrained recall = %f" % recall(labels, trained_predictions)) +``` + +This code gives the following output (again, you'll get a different answer, +since the dataset is random): + +```none +Unconstrained average hinge loss = 0.627271 +Unconstrained recall = 0.793951 +``` + +Because there is no constraint, the unconstrained problem does a better job of +minimizing the average hinge loss, but naturally doesn't approach 90% recall. diff --git a/tensorflow/contrib/constrained_optimization/__init__.py b/tensorflow/contrib/constrained_optimization/__init__.py new file mode 100644 index 0000000000..1e49ba9f17 --- /dev/null +++ b/tensorflow/contrib/constrained_optimization/__init__.py @@ -0,0 +1,41 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""A library for performing constrained optimization in TensorFlow.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# pylint: disable=wildcard-import +from tensorflow.contrib.constrained_optimization.python.candidates import * +from tensorflow.contrib.constrained_optimization.python.constrained_minimization_problem import * +from tensorflow.contrib.constrained_optimization.python.constrained_optimizer import * +from tensorflow.contrib.constrained_optimization.python.external_regret_optimizer import * +from tensorflow.contrib.constrained_optimization.python.swap_regret_optimizer import * +# pylint: enable=wildcard-import + +from tensorflow.python.util.all_util import remove_undocumented + +_allowed_symbols = [ + "AdditiveExternalRegretOptimizer", + "AdditiveSwapRegretOptimizer", + "ConstrainedMinimizationProblem", + "ConstrainedOptimizer", + "find_best_candidate_distribution", + "find_best_candidate_index", + "MultiplicativeSwapRegretOptimizer", +] + +remove_undocumented(__name__, _allowed_symbols) diff --git a/tensorflow/contrib/constrained_optimization/python/candidates.py b/tensorflow/contrib/constrained_optimization/python/candidates.py new file mode 100644 index 0000000000..ac86a6741b --- /dev/null +++ b/tensorflow/contrib/constrained_optimization/python/candidates.py @@ -0,0 +1,319 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Code for optimizing over a set of candidate solutions. + +The functions in this file deal with the constrained problem: + +> minimize f(w) +> s.t. g_i(w) <= 0 for all i in {0,1,...,m-1} + +Here, f(w) is the "objective function", and g_i(w) is the ith (of m) "constraint +function". Given the values of the objective and constraint functions for a set +of n "candidate solutions" {w_0,w_1,...,w_{n-1}} (for a total of n objective +function values, and n*m constraint function values), the +`find_best_candidate_distribution` function finds the best DISTRIBUTION over +these candidates, while `find_best_candidate_index' heuristically finds the +single best candidate. + +Both of these functions have dependencies on `scipy`, so if you want to call +them, then you must make sure that `scipy` is available. The imports are +performed inside the functions themselves, so if they're not actually called, +then `scipy` is not needed. + +For more specifics, please refer to: + +> Cotter, Jiang and Sridharan. "Two-Player Games for Efficient Non-Convex +> Constrained Optimization". +> [https://arxiv.org/abs/1804.06500](https://arxiv.org/abs/1804.06500) + +The `find_best_candidate_distribution` function implements the approach +described in Lemma 3, while `find_best_candidate_index` implements the heuristic +used for hyperparameter search in the experiments of Section 5.2. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np +from six.moves import xrange # pylint: disable=redefined-builtin + + +def _find_best_candidate_distribution_helper(objective_vector, + constraints_matrix, + maximum_violation=0.0): + """Finds a distribution minimizing an objective subject to constraints. + + This function deals with the constrained problem: + + > minimize f(w) + > s.t. g_i(w) <= 0 for all i in {0,1,...,m-1} + + Here, f(w) is the "objective function", and g_i(w) is the ith (of m) + "constraint function". Given a set of n "candidate solutions" + {w_0,w_1,...,w_{n-1}}, this function finds a distribution over these n + candidates that, in expectation, minimizes the objective while violating + the constraints by no more than `maximum_violation`. If no such distribution + exists, it returns an error (using Go-style error reporting). + + The `objective_vector` parameter should be a numpy array with shape (n,), for + which objective_vector[i] = f(w_i). Likewise, `constraints_matrix` should be a + numpy array with shape (m,n), for which constraints_matrix[i,j] = g_i(w_j). + + This function will return a distribution for which at most m+1 probabilities, + and often fewer, are nonzero. + + Args: + objective_vector: numpy array of shape (n,), where n is the number of + "candidate solutions". Contains the objective function values. + constraints_matrix: numpy array of shape (m,n), where m is the number of + constraints and n is the number of "candidate solutions". Contains the + constraint violation magnitudes. + maximum_violation: nonnegative float, the maximum amount by which any + constraint may be violated, in expectation. + + Returns: + A pair (`result`, `message`), exactly one of which is None. If `message` is + None, then the `result` contains the optimal distribution as a numpy array + of shape (n,). If `result` is None, then `message` contains an error + message. + + Raises: + ValueError: If `objective_vector` and `constraints_matrix` have inconsistent + shapes, or if `maximum_violation` is negative. + ImportError: If we're unable to import `scipy.optimize`. + """ + if maximum_violation < 0.0: + raise ValueError("maximum_violation must be nonnegative") + + mm, nn = np.shape(constraints_matrix) + if (nn,) != np.shape(objective_vector): + raise ValueError( + "objective_vector must have shape (n,), and constraints_matrix (m, n)," + " where n is the number of candidates, and m is the number of " + "constraints") + + # We import scipy inline, instead of at the top of the file, so that a scipy + # dependency is only introduced if either find_best_candidate_distribution() + # or find_best_candidate_index() are actually called. + import scipy.optimize # pylint: disable=g-import-not-at-top + + # Feasibility (within maximum_violation) constraints. + a_ub = constraints_matrix + b_ub = np.full((mm, 1), maximum_violation) + # Sum-to-one constraint. + a_eq = np.ones((1, nn)) + b_eq = np.ones((1, 1)) + # Nonnegativity constraints. + bounds = (0, None) + + result = scipy.optimize.linprog( + objective_vector, + A_ub=a_ub, + b_ub=b_ub, + A_eq=a_eq, + b_eq=b_eq, + bounds=bounds) + # Go-style error reporting. We don't raise on error, since + # find_best_candidate_distribution() needs to handle the failure case, and we + # shouldn't use exceptions as flow-control. + if not result.success: + return (None, result.message) + else: + return (result.x, None) + + +def find_best_candidate_distribution(objective_vector, + constraints_matrix, + epsilon=0.0): + """Finds a distribution minimizing an objective subject to constraints. + + This function deals with the constrained problem: + + > minimize f(w) + > s.t. g_i(w) <= 0 for all i in {0,1,...,m-1} + + Here, f(w) is the "objective function", and g_i(w) is the ith (of m) + "constraint function". Given a set of n "candidate solutions" + {w_0,w_1,...,w_{n-1}}, this function finds a distribution over these n + candidates that, in expectation, minimizes the objective while violating + the constraints by the smallest possible amount (with the amount being found + via bisection search). + + The `objective_vector` parameter should be a numpy array with shape (n,), for + which objective_vector[i] = f(w_i). Likewise, `constraints_matrix` should be a + numpy array with shape (m,n), for which constraints_matrix[i,j] = g_i(w_j). + + This function will return a distribution for which at most m+1 probabilities, + and often fewer, are nonzero. + + For more specifics, please refer to: + + > Cotter, Jiang and Sridharan. "Two-Player Games for Efficient Non-Convex + > Constrained Optimization". + > [https://arxiv.org/abs/1804.06500](https://arxiv.org/abs/1804.06500) + + This function implements the approach described in Lemma 3. + + Args: + objective_vector: numpy array of shape (n,), where n is the number of + "candidate solutions". Contains the objective function values. + constraints_matrix: numpy array of shape (m,n), where m is the number of + constraints and n is the number of "candidate solutions". Contains the + constraint violation magnitudes. + epsilon: nonnegative float, the threshold at which to terminate the binary + search while searching for the minimal expected constraint violation + magnitude. + + Returns: + The optimal distribution, as a numpy array of shape (n,). + + Raises: + ValueError: If `objective_vector` and `constraints_matrix` have inconsistent + shapes, or if `epsilon` is negative. + ImportError: If we're unable to import `scipy.optimize`. + """ + if epsilon < 0.0: + raise ValueError("epsilon must be nonnegative") + + # If there is a feasible solution (i.e. with maximum_violation=0), then that's + # what we'll return. + pp, _ = _find_best_candidate_distribution_helper(objective_vector, + constraints_matrix) + if pp is not None: + return pp + + # The bound is the minimum over all candidates, of the maximum per-candidate + # constraint violation. + lower = 0.0 + upper = np.min(np.amax(constraints_matrix, axis=0)) + best_pp, _ = _find_best_candidate_distribution_helper( + objective_vector, constraints_matrix, maximum_violation=upper) + assert best_pp is not None + + # Throughout this loop, a maximum_violation of "lower" is not achievable, + # but a maximum_violation of "upper" is achiveable. + while True: + middle = 0.5 * (lower + upper) + if (middle - lower <= epsilon) or (upper - middle <= epsilon): + break + else: + pp, _ = _find_best_candidate_distribution_helper( + objective_vector, constraints_matrix, maximum_violation=middle) + if pp is None: + lower = middle + else: + best_pp = pp + upper = middle + + return best_pp + + +def find_best_candidate_index(objective_vector, + constraints_matrix, + rank_objectives=False): + """Heuristically finds the best candidate solution to a constrained problem. + + This function deals with the constrained problem: + + > minimize f(w) + > s.t. g_i(w) <= 0 for all i in {0,1,...,m-1} + + Here, f(w) is the "objective function", and g_i(w) is the ith (of m) + "constraint function". Given a set of n "candidate solutions" + {w_0,w_1,...,w_{n-1}}, this function finds the "best" solution according + to the following heuristic: + + 1. Across all models, the ith constraint violations (i.e. max{0, g_i(0)}) + are ranked, as are the objectives (if rank_objectives=True). + 2. Each model is then associated its MAXIMUM rank across all m constraints + (and the objective, if rank_objectives=True). + 3. The model with the minimal maximum rank is then identified. Ties are + broken using the objective function value. + 4. The index of this "best" model is returned. + + The `objective_vector` parameter should be a numpy array with shape (n,), for + which objective_vector[i] = f(w_i). Likewise, `constraints_matrix` should be a + numpy array with shape (m,n), for which constraints_matrix[i,j] = g_i(w_j). + + For more specifics, please refer to: + + > Cotter, Jiang and Sridharan. "Two-Player Games for Efficient Non-Convex + > Constrained Optimization". + > [https://arxiv.org/abs/1804.06500](https://arxiv.org/abs/1804.06500) + + This function implements the heuristic used for hyperparameter search in the + experiments of Section 5.2. + + Args: + objective_vector: numpy array of shape (n,), where n is the number of + "candidate solutions". Contains the objective function values. + constraints_matrix: numpy array of shape (m,n), where m is the number of + constraints and n is the number of "candidate solutions". Contains the + constraint violation magnitudes. + rank_objectives: bool, whether the objective function values should be + included in the initial ranking step. If True, both the objective and + constraints will be ranked. If False, only the constraints will be ranked. + In either case, the objective function values will be used for + tiebreaking. + + Returns: + The index (in {0,1,...,n-1}) of the "best" model according to the above + heuristic. + + Raises: + ValueError: If `objective_vector` and `constraints_matrix` have inconsistent + shapes. + ImportError: If we're unable to import `scipy.stats`. + """ + mm, nn = np.shape(constraints_matrix) + if (nn,) != np.shape(objective_vector): + raise ValueError( + "objective_vector must have shape (n,), and constraints_matrix (m, n)," + " where n is the number of candidates, and m is the number of " + "constraints") + + # We import scipy inline, instead of at the top of the file, so that a scipy + # dependency is only introduced if either find_best_candidate_distribution() + # or find_best_candidate_index() are actually called. + import scipy.stats # pylint: disable=g-import-not-at-top + + if rank_objectives: + maximum_ranks = scipy.stats.rankdata(objective_vector, method="min") + else: + maximum_ranks = np.zeros(nn, dtype=np.int64) + for ii in xrange(mm): + # Take the maximum of the constraint functions with zero, since we want to + # rank the magnitude of constraint *violations*. If the constraint is + # satisfied, then we don't care how much it's satisfied by (as a result, we + # we expect all models satisfying a constraint to be tied at rank 1). + ranks = scipy.stats.rankdata( + np.maximum(0.0, constraints_matrix[ii, :]), method="min") + maximum_ranks = np.maximum(maximum_ranks, ranks) + + best_index = None + best_rank = float("Inf") + best_objective = float("Inf") + for ii in xrange(nn): + if maximum_ranks[ii] < best_rank: + best_index = ii + best_rank = maximum_ranks[ii] + best_objective = objective_vector[ii] + elif (maximum_ranks[ii] == best_rank) and (objective_vector[ii] <= + best_objective): + best_index = ii + best_objective = objective_vector[ii] + + return best_index diff --git a/tensorflow/contrib/constrained_optimization/python/candidates_test.py b/tensorflow/contrib/constrained_optimization/python/candidates_test.py new file mode 100644 index 0000000000..a4c49d48bc --- /dev/null +++ b/tensorflow/contrib/constrained_optimization/python/candidates_test.py @@ -0,0 +1,95 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for constrained_optimization.python.candidates.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.contrib.constrained_optimization.python import candidates +from tensorflow.python.platform import test + + +class CandidatesTest(test.TestCase): + + def test_inconsistent_shapes_for_best_distribution(self): + """An error is raised when parameters have inconsistent shapes.""" + objective_vector = np.array([1, 2, 3]) + constraints_matrix = np.array([[1, 2, 3, 4], [5, 6, 7, 8]]) + with self.assertRaises(ValueError): + _ = candidates.find_best_candidate_distribution(objective_vector, + constraints_matrix) + + def test_inconsistent_shapes_for_best_index(self): + """An error is raised when parameters have inconsistent shapes.""" + objective_vector = np.array([1, 2, 3]) + constraints_matrix = np.array([[1, 2, 3, 4], [5, 6, 7, 8]]) + with self.assertRaises(ValueError): + _ = candidates.find_best_candidate_index(objective_vector, + constraints_matrix) + + def test_best_distribution(self): + """Distribution should match known solution.""" + objective_vector = np.array( + [0.03053309, -0.06667082, 0.88355145, 0.46529806]) + constraints_matrix = np.array( + [[-0.60164551, 0.36676229, 0.7856454, -0.8441711], + [0.00371592, -0.16392108, -0.59778071, -0.56908492]]) + distribution = candidates.find_best_candidate_distribution( + objective_vector, constraints_matrix) + # Verify that the solution is a probability distribution. + self.assertTrue(np.all(distribution >= 0)) + self.assertAlmostEqual(np.sum(distribution), 1.0) + # Verify that the solution satisfies the constraints. + maximum_constraint_violation = np.amax( + np.dot(constraints_matrix, distribution)) + self.assertLessEqual(maximum_constraint_violation, 0) + # Verify that the solution matches that which we expect. + expected_distribution = np.array([0.37872711, 0.62127289, 0, 0]) + self.assertAllClose(expected_distribution, distribution, rtol=0, atol=1e-6) + + def test_best_index_rank_objectives_true(self): + """Index should match known solution.""" + # Objective ranks = [2, 1, 4, 3]. + objective_vector = np.array( + [0.03053309, -0.06667082, 0.88355145, 0.46529806]) + # Constraint ranks = [[1, 3, 4, 1], [4, 1, 1, 1]]. + constraints_matrix = np.array( + [[-0.60164551, 0.36676229, 0.7856454, -0.8441711], + [0.00371592, -0.16392108, -0.59778071, -0.56908492]]) + # Maximum ranks = [4, 3, 4, 3]. + index = candidates.find_best_candidate_index( + objective_vector, constraints_matrix, rank_objectives=True) + self.assertEqual(1, index) + + def test_best_index_rank_objectives_false(self): + """Index should match known solution.""" + # Objective ranks = [2, 1, 4, 3]. + objective_vector = np.array( + [0.03053309, -0.06667082, 0.88355145, 0.46529806]) + # Constraint ranks = [[1, 3, 4, 1], [4, 1, 1, 1]]. + constraints_matrix = np.array( + [[-0.60164551, 0.36676229, 0.7856454, -0.8441711], + [0.00371592, -0.16392108, -0.59778071, -0.56908492]]) + # Maximum ranks = [4, 3, 4, 1]. + index = candidates.find_best_candidate_index( + objective_vector, constraints_matrix, rank_objectives=False) + self.assertEqual(3, index) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/constrained_optimization/python/constrained_minimization_problem.py b/tensorflow/contrib/constrained_optimization/python/constrained_minimization_problem.py new file mode 100644 index 0000000000..70813fb217 --- /dev/null +++ b/tensorflow/contrib/constrained_optimization/python/constrained_minimization_problem.py @@ -0,0 +1,123 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Defines abstract class for `ConstrainedMinimizationProblem`s. + +A ConstrainedMinimizationProblem consists of an objective function to minimize, +and a set of constraint functions that are constrained to be nonpositive. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import abc + +import six + + +@six.add_metaclass(abc.ABCMeta) +class ConstrainedMinimizationProblem(object): + """Abstract class representing a `ConstrainedMinimizationProblem`. + + A ConstrainedMinimizationProblem consists of an objective function to + minimize, and a set of constraint functions that are constrained to be + nonpositive. + + In addition to the constraint functions, there may (optionally) be proxy + constraint functions: a ConstrainedOptimizer will attempt to penalize these + proxy constraint functions so as to satisfy the (non-proxy) constraints. Proxy + constraints could be used if the constraints functions are difficult or + impossible to optimize (e.g. if they're piecewise constant), in which case the + proxy constraints should be some approximation of the original constraints + that is well-enough behaved to permit successful optimization. + """ + + @abc.abstractproperty + def objective(self): + """Returns the objective function. + + Returns: + A 0d tensor that should be minimized. + """ + pass + + @property + def num_constraints(self): + """Returns the number of constraints. + + Returns: + An int containing the number of constraints. + + Raises: + ValueError: If the constraints (or proxy_constraints, if present) do not + have fully-known shapes, OR if proxy_constraints are present, and the + shapes of constraints and proxy_constraints are fully-known, but they're + different. + """ + constraints_shape = self.constraints.get_shape() + if self.proxy_constraints is None: + proxy_constraints_shape = constraints_shape + else: + proxy_constraints_shape = self.proxy_constraints.get_shape() + + if (constraints_shape is None or proxy_constraints_shape is None or + any([ii is None for ii in constraints_shape.as_list()]) or + any([ii is None for ii in proxy_constraints_shape.as_list()])): + raise ValueError( + "constraints and proxy_constraints must have fully-known shapes") + if constraints_shape != proxy_constraints_shape: + raise ValueError( + "constraints and proxy_constraints must have the same shape") + + size = 1 + for ii in constraints_shape.as_list(): + size *= ii + return int(size) + + @abc.abstractproperty + def constraints(self): + """Returns the vector of constraint functions. + + Letting g_i be the ith element of the constraints vector, the ith constraint + will be g_i <= 0. + + Returns: + A tensor of constraint functions. + """ + pass + + # This is a property, instead of an abstract property, since it doesn't need + # to be overridden: if proxy_constraints returns None, then there are no + # proxy constraints. + @property + def proxy_constraints(self): + """Returns the optional vector of proxy constraint functions. + + The difference between `constraints` and `proxy_constraints` is that, when + proxy constraints are present, the `constraints` are merely EVALUATED during + optimization, whereas the `proxy_constraints` are DIFFERENTIATED. If there + are no proxy constraints, then the `constraints` are both evaluated and + differentiated. + + For example, if we want to impose constraints on step functions, then we + could use these functions for `constraints`. However, because a step + function has zero gradient almost everywhere, we can't differentiate these + functions, so we would take `proxy_constraints` to be some differentiable + approximation of `constraints`. + + Returns: + A tensor of proxy constraint functions. + """ + return None diff --git a/tensorflow/contrib/constrained_optimization/python/constrained_optimizer.py b/tensorflow/contrib/constrained_optimization/python/constrained_optimizer.py new file mode 100644 index 0000000000..8055545366 --- /dev/null +++ b/tensorflow/contrib/constrained_optimization/python/constrained_optimizer.py @@ -0,0 +1,208 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Defines base class for `ConstrainedOptimizer`s.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import abc + +import six + +from tensorflow.python.framework import ops +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import standard_ops +from tensorflow.python.training import optimizer as train_optimizer + + +@six.add_metaclass(abc.ABCMeta) +class ConstrainedOptimizer(object): + """Base class representing a constrained optimizer. + + A ConstrainedOptimizer wraps a tf.train.Optimizer (or more than one), and + applies it to a ConstrainedMinimizationProblem. Unlike a tf.train.Optimizer, + which takes a tensor to minimize as a parameter to its minimize() method, a + constrained optimizer instead takes a ConstrainedMinimizationProblem. + """ + + def __init__(self, optimizer): + """Constructs a new `ConstrainedOptimizer`. + + Args: + optimizer: tf.train.Optimizer, used to optimize the + ConstraintedMinimizationProblem. + + Returns: + A new `ConstrainedOptimizer`. + """ + self._optimizer = optimizer + + @property + def optimizer(self): + """Returns the `tf.train.Optimizer` used for optimization.""" + return self._optimizer + + def minimize_unconstrained(self, + minimization_problem, + global_step=None, + var_list=None, + gate_gradients=train_optimizer.Optimizer.GATE_OP, + aggregation_method=None, + colocate_gradients_with_ops=False, + name=None, + grad_loss=None): + """Returns an `Op` for minimizing the unconstrained problem. + + Unlike `minimize_constrained`, this function ignores the `constraints` (and + `proxy_constraints`) portion of the minimization problem entirely, and only + minimizes `objective`. + + Args: + minimization_problem: ConstrainedMinimizationProblem, the problem to + optimize. + global_step: as in `tf.train.Optimizer`'s `minimize` method. + var_list: as in `tf.train.Optimizer`'s `minimize` method. + gate_gradients: as in `tf.train.Optimizer`'s `minimize` method. + aggregation_method: as in `tf.train.Optimizer`'s `minimize` method. + colocate_gradients_with_ops: as in `tf.train.Optimizer`'s `minimize` + method. + name: as in `tf.train.Optimizer`'s `minimize` method. + grad_loss: as in `tf.train.Optimizer`'s `minimize` method. + + Returns: + TensorFlow Op. + """ + return self.optimizer.minimize( + minimization_problem.objective, + global_step=global_step, + var_list=var_list, + gate_gradients=gate_gradients, + aggregation_method=aggregation_method, + colocate_gradients_with_ops=colocate_gradients_with_ops, + name=name, + grad_loss=grad_loss) + + @abc.abstractmethod + def minimize_constrained(self, + minimization_problem, + global_step=None, + var_list=None, + gate_gradients=train_optimizer.Optimizer.GATE_OP, + aggregation_method=None, + colocate_gradients_with_ops=False, + name=None, + grad_loss=None): + """Returns an `Op` for minimizing the constrained problem. + + Unlike `minimize_unconstrained`, this function attempts to find a solution + that minimizes the `objective` portion of the minimization problem while + satisfying the `constraints` portion. + + Args: + minimization_problem: ConstrainedMinimizationProblem, the problem to + optimize. + global_step: as in `tf.train.Optimizer`'s `minimize` method. + var_list: as in `tf.train.Optimizer`'s `minimize` method. + gate_gradients: as in `tf.train.Optimizer`'s `minimize` method. + aggregation_method: as in `tf.train.Optimizer`'s `minimize` method. + colocate_gradients_with_ops: as in `tf.train.Optimizer`'s `minimize` + method. + name: as in `tf.train.Optimizer`'s `minimize` method. + grad_loss: as in `tf.train.Optimizer`'s `minimize` method. + + Returns: + TensorFlow Op. + """ + pass + + def minimize(self, + minimization_problem, + unconstrained_steps=None, + global_step=None, + var_list=None, + gate_gradients=train_optimizer.Optimizer.GATE_OP, + aggregation_method=None, + colocate_gradients_with_ops=False, + name=None, + grad_loss=None): + """Returns an `Op` for minimizing the constrained problem. + + This method combines the functionality of `minimize_unconstrained` and + `minimize_constrained`. If global_step < unconstrained_steps, it will + perform an unconstrained update, and if global_step >= unconstrained_steps, + it will perform a constrained update. + + The reason for this functionality is that it may be best to initialize the + constrained optimizer with an approximate optimum of the unconstrained + problem. + + Args: + minimization_problem: ConstrainedMinimizationProblem, the problem to + optimize. + unconstrained_steps: int, number of steps for which we should perform + unconstrained updates, before transitioning to constrained updates. + global_step: as in `tf.train.Optimizer`'s `minimize` method. + var_list: as in `tf.train.Optimizer`'s `minimize` method. + gate_gradients: as in `tf.train.Optimizer`'s `minimize` method. + aggregation_method: as in `tf.train.Optimizer`'s `minimize` method. + colocate_gradients_with_ops: as in `tf.train.Optimizer`'s `minimize` + method. + name: as in `tf.train.Optimizer`'s `minimize` method. + grad_loss: as in `tf.train.Optimizer`'s `minimize` method. + + Returns: + TensorFlow Op. + + Raises: + ValueError: If unconstrained_steps is provided, but global_step is not. + """ + + def unconstrained_fn(): + """Returns an `Op` for minimizing the unconstrained problem.""" + return self.minimize_unconstrained( + minimization_problem=minimization_problem, + global_step=global_step, + var_list=var_list, + gate_gradients=gate_gradients, + aggregation_method=aggregation_method, + colocate_gradients_with_ops=colocate_gradients_with_ops, + name=name, + grad_loss=grad_loss) + + def constrained_fn(): + """Returns an `Op` for minimizing the constrained problem.""" + return self.minimize_constrained( + minimization_problem=minimization_problem, + global_step=global_step, + var_list=var_list, + gate_gradients=gate_gradients, + aggregation_method=aggregation_method, + colocate_gradients_with_ops=colocate_gradients_with_ops, + name=name, + grad_loss=grad_loss) + + if unconstrained_steps is not None: + if global_step is None: + raise ValueError( + "global_step cannot be None if unconstrained_steps is provided") + unconstrained_steps_tensor = ops.convert_to_tensor(unconstrained_steps) + dtype = unconstrained_steps_tensor.dtype + return control_flow_ops.cond( + standard_ops.cast(global_step, dtype) < unconstrained_steps_tensor, + true_fn=unconstrained_fn, + false_fn=constrained_fn) + else: + return constrained_fn() diff --git a/tensorflow/contrib/constrained_optimization/python/external_regret_optimizer.py b/tensorflow/contrib/constrained_optimization/python/external_regret_optimizer.py new file mode 100644 index 0000000000..01c6e4f08a --- /dev/null +++ b/tensorflow/contrib/constrained_optimization/python/external_regret_optimizer.py @@ -0,0 +1,375 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Defines `AdditiveExternalRegretOptimizer`. + +This optimizer minimizes a `ConstrainedMinimizationProblem` by introducing +Lagrange multipliers, and using `tf.train.Optimizer`s to jointly optimize over +the model parameters and Lagrange multipliers. + +For the purposes of constrained optimization, at least in theory, +external-regret minimization suffices if the `ConstrainedMinimizationProblem` +we're optimizing doesn't have any `proxy_constraints`, while swap-regret +minimization should be used if `proxy_constraints` are present. + +For more specifics, please refer to: + +> Cotter, Jiang and Sridharan. "Two-Player Games for Efficient Non-Convex +> Constrained Optimization". +> [https://arxiv.org/abs/1804.06500](https://arxiv.org/abs/1804.06500) + +The formulation used by the AdditiveExternalRegretOptimizer--which is simply the +usual Lagrangian formulation--can be found in Definition 1, and is discussed in +Section 3. This optimizer is most similar to Algorithm 3 in Appendix C.3, with +the two differences being that it uses proxy constraints (if they're provided) +in the update of the model parameters, and uses `tf.train.Optimizer`s, instead +of SGD, for the "inner" updates. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import abc + +import six + +from tensorflow.contrib.constrained_optimization.python import constrained_optimizer + +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import standard_ops +from tensorflow.python.ops import state_ops +from tensorflow.python.training import optimizer as train_optimizer + + +def _project_multipliers_wrt_euclidean_norm(multipliers, radius): + """Projects its argument onto the feasible region. + + The feasible region is the set of all vectors with nonnegative elements that + sum to at most `radius`. + + Args: + multipliers: 1d tensor, the Lagrange multipliers to project. + radius: float, the radius of the feasible region. + + Returns: + The 1d tensor that results from projecting `multipliers` onto the feasible + region w.r.t. the Euclidean norm. + + Raises: + ValueError: if the `multipliers` tensor does not have a fully-known shape, + or is not one-dimensional. + """ + multipliers_shape = multipliers.get_shape() + if multipliers_shape is None: + raise ValueError("multipliers must have known shape") + if multipliers_shape.ndims != 1: + raise ValueError( + "multipliers must be one dimensional (instead is %d-dimensional)" % + multipliers_shape.ndims) + dimension = multipliers_shape[0].value + if dimension is None: + raise ValueError("multipliers must have fully-known shape") + + def while_loop_condition(iteration, multipliers, inactive, old_inactive): + """Returns false if the while loop should terminate.""" + del multipliers # Needed by the body, but not the condition. + not_done = (iteration < dimension) + not_converged = standard_ops.reduce_any( + standard_ops.not_equal(inactive, old_inactive)) + return standard_ops.logical_and(not_done, not_converged) + + def while_loop_body(iteration, multipliers, inactive, old_inactive): + """Performs one iteration of the projection.""" + del old_inactive # Needed by the condition, but not the body. + iteration += 1 + scale = standard_ops.minimum( + 0.0, + (radius - standard_ops.reduce_sum(multipliers)) / standard_ops.maximum( + 1.0, standard_ops.reduce_sum(inactive))) + multipliers += scale * inactive + new_inactive = standard_ops.to_float(multipliers > 0) + multipliers *= new_inactive + return (iteration, multipliers, new_inactive, inactive) + + iteration = standard_ops.constant(0) + inactive = standard_ops.ones_like(multipliers) + + # We actually want a do-while loop, so we explicitly call while_loop_body() + # once before tf.while_loop(). + iteration, multipliers, inactive, old_inactive = while_loop_body( + iteration, multipliers, inactive, inactive) + iteration, multipliers, inactive, old_inactive = control_flow_ops.while_loop( + while_loop_condition, + while_loop_body, + loop_vars=(iteration, multipliers, inactive, old_inactive), + name="euclidean_projection") + + return multipliers + + +@six.add_metaclass(abc.ABCMeta) +class _ExternalRegretOptimizer(constrained_optimizer.ConstrainedOptimizer): + """Base class representing an `_ExternalRegretOptimizer`. + + This class contains most of the logic for performing constrained + optimization, minimizing external regret for the constraints player. What it + *doesn't* do is keep track of the internal state (the Lagrange multipliers). + Instead, the state is accessed via the _initial_state(), + _lagrange_multipliers(), _constraint_grad_and_var() and _projection_op() + methods. + + The reason for this is that we want to make it easy to implement different + representations of the internal state. + + For more specifics, please refer to: + + > Cotter, Jiang and Sridharan. "Two-Player Games for Efficient Non-Convex + > Constrained Optimization". + > [https://arxiv.org/abs/1804.06500](https://arxiv.org/abs/1804.06500) + + The formulation used by `_ExternalRegretOptimizer`s--which is simply the usual + Lagrangian formulation--can be found in Definition 1, and is discussed in + Section 3. Such optimizers are most similar to Algorithm 3 in Appendix C.3. + """ + + def __init__(self, optimizer, constraint_optimizer=None): + """Constructs a new `_ExternalRegretOptimizer`. + + The difference between `optimizer` and `constraint_optimizer` (if the latter + is provided) is that the former is used for learning the model parameters, + while the latter us used for the Lagrange multipliers. If no + `constraint_optimizer` is provided, then `optimizer` is used for both. + + Args: + optimizer: tf.train.Optimizer, used to optimize the objective and + proxy_constraints portion of the ConstrainedMinimizationProblem. If + constraint_optimizer is not provided, this will also be used to optimize + the Lagrange multipliers. + constraint_optimizer: optional tf.train.Optimizer, used to optimize the + Lagrange multipliers. + + Returns: + A new `_ExternalRegretOptimizer`. + """ + super(_ExternalRegretOptimizer, self).__init__(optimizer=optimizer) + self._constraint_optimizer = constraint_optimizer + + @property + def constraint_optimizer(self): + """Returns the `tf.train.Optimizer` used for the Lagrange multipliers.""" + return self._constraint_optimizer + + @abc.abstractmethod + def _initial_state(self, num_constraints): + pass + + @abc.abstractmethod + def _lagrange_multipliers(self, state): + pass + + @abc.abstractmethod + def _constraint_grad_and_var(self, state, gradient): + pass + + @abc.abstractmethod + def _projection_op(self, state, name=None): + pass + + def minimize_constrained(self, + minimization_problem, + global_step=None, + var_list=None, + gate_gradients=train_optimizer.Optimizer.GATE_OP, + aggregation_method=None, + colocate_gradients_with_ops=False, + name=None, + grad_loss=None): + """Returns an `Op` for minimizing the constrained problem. + + The `optimizer` constructor parameter will be used to update the model + parameters, while the Lagrange multipliers will be updated using + `constrained_optimizer` (if provided) or `optimizer` (if not). + + Args: + minimization_problem: ConstrainedMinimizationProblem, the problem to + optimize. + global_step: as in `tf.train.Optimizer`'s `minimize` method. + var_list: as in `tf.train.Optimizer`'s `minimize` method. + gate_gradients: as in `tf.train.Optimizer`'s `minimize` method. + aggregation_method: as in `tf.train.Optimizer`'s `minimize` method. + colocate_gradients_with_ops: as in `tf.train.Optimizer`'s `minimize` + method. + name: as in `tf.train.Optimizer`'s `minimize` method. + grad_loss: as in `tf.train.Optimizer`'s `minimize` method. + + Returns: + TensorFlow Op. + """ + objective = minimization_problem.objective + + constraints = minimization_problem.constraints + proxy_constraints = minimization_problem.proxy_constraints + if proxy_constraints is None: + proxy_constraints = constraints + # Flatten both constraints tensors to 1d. + num_constraints = minimization_problem.num_constraints + constraints = standard_ops.reshape(constraints, shape=(num_constraints,)) + proxy_constraints = standard_ops.reshape( + proxy_constraints, shape=(num_constraints,)) + + # We use a lambda to initialize the state so that, if this function call is + # inside the scope of a tf.control_dependencies() block, the dependencies + # will not be applied to the initializer. + state = standard_ops.Variable( + lambda: self._initial_state(num_constraints), + trainable=False, + name="external_regret_optimizer_state") + + multipliers = self._lagrange_multipliers(state) + loss = ( + objective + standard_ops.tensordot(multipliers, proxy_constraints, 1)) + multipliers_gradient = constraints + + update_ops = [] + if self.constraint_optimizer is None: + # If we don't have a separate constraint_optimizer, then we use + # self._optimizer for both the update of the model parameters, and that of + # the internal state. + grads_and_vars = self.optimizer.compute_gradients( + loss, + var_list=var_list, + gate_gradients=gate_gradients, + aggregation_method=aggregation_method, + colocate_gradients_with_ops=colocate_gradients_with_ops, + grad_loss=grad_loss) + grads_and_vars.append( + self._constraint_grad_and_var(state, multipliers_gradient)) + update_ops.append( + self.optimizer.apply_gradients(grads_and_vars, name="update")) + else: + # If we have a separate constraint_optimizer, then we use self._optimizer + # for the update of the model parameters, and self._constraint_optimizer + # for that of the internal state. + grads_and_vars = self.optimizer.compute_gradients( + loss, + var_list=var_list, + gate_gradients=gate_gradients, + aggregation_method=aggregation_method, + colocate_gradients_with_ops=colocate_gradients_with_ops, + grad_loss=grad_loss) + multiplier_grads_and_vars = [ + self._constraint_grad_and_var(state, multipliers_gradient) + ] + + gradients = [ + gradient for gradient, _ in grads_and_vars + multiplier_grads_and_vars + if gradient is not None + ] + with ops.control_dependencies(gradients): + update_ops.append( + self.optimizer.apply_gradients(grads_and_vars, name="update")) + update_ops.append( + self.constraint_optimizer.apply_gradients( + multiplier_grads_and_vars, name="optimizer_state_update")) + + with ops.control_dependencies(update_ops): + if global_step is None: + # If we don't have a global step, just project, and we're done. + return self._projection_op(state, name=name) + else: + # If we have a global step, then we need to increment it in addition to + # projecting. + projection_op = self._projection_op(state, name="project") + with ops.colocate_with(global_step): + global_step_op = state_ops.assign_add( + global_step, 1, name="global_step_increment") + return control_flow_ops.group(projection_op, global_step_op, name=name) + + +class AdditiveExternalRegretOptimizer(_ExternalRegretOptimizer): + """A `ConstrainedOptimizer` based on external-regret minimization. + + This `ConstrainedOptimizer` uses the given `tf.train.Optimizer`s to jointly + minimize over the model parameters, and maximize over Lagrange multipliers, + with the latter maximization using additive updates and an algorithm that + minimizes external regret. + + For more specifics, please refer to: + + > Cotter, Jiang and Sridharan. "Two-Player Games for Efficient Non-Convex + > Constrained Optimization". + > [https://arxiv.org/abs/1804.06500](https://arxiv.org/abs/1804.06500) + + The formulation used by this optimizer--which is simply the usual Lagrangian + formulation--can be found in Definition 1, and is discussed in Section 3. It + is most similar to Algorithm 3 in Appendix C.3, with the two differences being + that it uses proxy constraints (if they're provided) in the update of the + model parameters, and uses `tf.train.Optimizer`s, instead of SGD, for the + "inner" updates. + """ + + def __init__(self, + optimizer, + constraint_optimizer=None, + maximum_multiplier_radius=None): + """Constructs a new `AdditiveExternalRegretOptimizer`. + + Args: + optimizer: tf.train.Optimizer, used to optimize the objective and + proxy_constraints portion of ConstrainedMinimizationProblem. If + constraint_optimizer is not provided, this will also be used to optimize + the Lagrange multipliers. + constraint_optimizer: optional tf.train.Optimizer, used to optimize the + Lagrange multipliers. + maximum_multiplier_radius: float, an optional upper bound to impose on the + sum of the Lagrange multipliers. + + Returns: + A new `AdditiveExternalRegretOptimizer`. + + Raises: + ValueError: If the maximum_multiplier_radius parameter is nonpositive. + """ + super(AdditiveExternalRegretOptimizer, self).__init__( + optimizer=optimizer, constraint_optimizer=constraint_optimizer) + + if maximum_multiplier_radius and (maximum_multiplier_radius <= 0.0): + raise ValueError("maximum_multiplier_radius must be strictly positive") + + self._maximum_multiplier_radius = maximum_multiplier_radius + + def _initial_state(self, num_constraints): + # For an AdditiveExternalRegretOptimizer, the internal state is simply a + # tensor of Lagrange multipliers with shape (m,), where m is the number of + # constraints. + return standard_ops.zeros((num_constraints,), dtype=dtypes.float32) + + def _lagrange_multipliers(self, state): + return state + + def _constraint_grad_and_var(self, state, gradient): + # TODO(acotter): tf.colocate_with(), if colocate_gradients_with_ops is True? + return (-gradient, state) + + def _projection_op(self, state, name=None): + with ops.colocate_with(state): + if self._maximum_multiplier_radius: + projected_multipliers = _project_multipliers_wrt_euclidean_norm( + state, self._maximum_multiplier_radius) + else: + projected_multipliers = standard_ops.maximum(state, 0.0) + return state_ops.assign(state, projected_multipliers, name=name) diff --git a/tensorflow/contrib/constrained_optimization/python/external_regret_optimizer_test.py b/tensorflow/contrib/constrained_optimization/python/external_regret_optimizer_test.py new file mode 100644 index 0000000000..9b4bf62710 --- /dev/null +++ b/tensorflow/contrib/constrained_optimization/python/external_regret_optimizer_test.py @@ -0,0 +1,136 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for constrained_optimization.python.external_regret_optimizer.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.contrib.constrained_optimization.python import external_regret_optimizer +from tensorflow.contrib.constrained_optimization.python import test_util + +from tensorflow.python.ops import standard_ops +from tensorflow.python.platform import test +from tensorflow.python.training import gradient_descent + + +class AdditiveExternalRegretOptimizerWrapper( + external_regret_optimizer.AdditiveExternalRegretOptimizer): + """Testing wrapper class around AdditiveExternalRegretOptimizer. + + This class is identical to AdditiveExternalRegretOptimizer, except that it + caches the internal optimization state when _lagrange_multipliers() is called, + so that we can test that the Lagrange multipliers take on their expected + values. + """ + + def __init__(self, + optimizer, + constraint_optimizer=None, + maximum_multiplier_radius=None): + """Same as AdditiveExternalRegretOptimizer.__init__.""" + super(AdditiveExternalRegretOptimizerWrapper, self).__init__( + optimizer=optimizer, + constraint_optimizer=constraint_optimizer, + maximum_multiplier_radius=maximum_multiplier_radius) + self._cached_lagrange_multipliers = None + + @property + def lagrange_multipliers(self): + """Returns the cached Lagrange multipliers.""" + return self._cached_lagrange_multipliers + + def _lagrange_multipliers(self, state): + """Caches the internal state for testing.""" + self._cached_lagrange_multipliers = super( + AdditiveExternalRegretOptimizerWrapper, + self)._lagrange_multipliers(state) + return self._cached_lagrange_multipliers + + +class ExternalRegretOptimizerTest(test.TestCase): + + def test_project_multipliers_wrt_euclidean_norm(self): + """Tests Euclidean projection routine on some known values.""" + multipliers1 = standard_ops.constant([-0.1, -0.6, -0.3]) + expected_projected_multipliers1 = np.array([0.0, 0.0, 0.0]) + + multipliers2 = standard_ops.constant([-0.1, 0.6, 0.3]) + expected_projected_multipliers2 = np.array([0.0, 0.6, 0.3]) + + multipliers3 = standard_ops.constant([0.4, 0.7, -0.2, 0.5, 0.1]) + expected_projected_multipliers3 = np.array([0.2, 0.5, 0.0, 0.3, 0.0]) + + with self.test_session() as session: + projected_multipliers1 = session.run( + external_regret_optimizer._project_multipliers_wrt_euclidean_norm( + multipliers1, 1.0)) + projected_multipliers2 = session.run( + external_regret_optimizer._project_multipliers_wrt_euclidean_norm( + multipliers2, 1.0)) + projected_multipliers3 = session.run( + external_regret_optimizer._project_multipliers_wrt_euclidean_norm( + multipliers3, 1.0)) + + self.assertAllClose( + expected_projected_multipliers1, + projected_multipliers1, + rtol=0, + atol=1e-6) + self.assertAllClose( + expected_projected_multipliers2, + projected_multipliers2, + rtol=0, + atol=1e-6) + self.assertAllClose( + expected_projected_multipliers3, + projected_multipliers3, + rtol=0, + atol=1e-6) + + def test_additive_external_regret_optimizer(self): + """Tests that the Lagrange multipliers update as expected.""" + minimization_problem = test_util.ConstantMinimizationProblem( + np.array([0.6, -0.1, 0.4])) + optimizer = AdditiveExternalRegretOptimizerWrapper( + gradient_descent.GradientDescentOptimizer(1.0), + maximum_multiplier_radius=1.0) + train_op = optimizer.minimize_constrained(minimization_problem) + + expected_multipliers = [ + np.array([0.0, 0.0, 0.0]), + np.array([0.6, 0.0, 0.4]), + np.array([0.7, 0.0, 0.3]), + np.array([0.8, 0.0, 0.2]), + np.array([0.9, 0.0, 0.1]), + np.array([1.0, 0.0, 0.0]), + np.array([1.0, 0.0, 0.0]), + ] + + multipliers = [] + with self.test_session() as session: + session.run(standard_ops.global_variables_initializer()) + while len(multipliers) < len(expected_multipliers): + multipliers.append(session.run(optimizer.lagrange_multipliers)) + session.run(train_op) + + for expected, actual in zip(expected_multipliers, multipliers): + self.assertAllClose(expected, actual, rtol=0, atol=1e-6) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/constrained_optimization/python/swap_regret_optimizer.py b/tensorflow/contrib/constrained_optimization/python/swap_regret_optimizer.py new file mode 100644 index 0000000000..04014ab4ae --- /dev/null +++ b/tensorflow/contrib/constrained_optimization/python/swap_regret_optimizer.py @@ -0,0 +1,595 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Defines `{Additive,Multiplicative}SwapRegretOptimizer`s. + +These optimizers minimize a `ConstrainedMinimizationProblem` by using a +swap-regret minimizing algorithm (either SGD or multiplicative weights) to learn +what weights should be associated with the objective function and constraints. +These algorithms do *not* use Lagrange multipliers, but the idea is similar. +The main differences between the formulation used here, and the standard +Lagrangian formulation, are that (i) the objective function is weighted, in +addition to the constraints, and (ii) we learn a matrix of weights, instead of a +vector. + +For the purposes of constrained optimization, at least in theory, +external-regret minimization suffices if the `ConstrainedMinimizationProblem` +we're optimizing doesn't have any `proxy_constraints`, while swap-regret +minimization should be used if `proxy_constraints` are present. + +For more specifics, please refer to: + +> Cotter, Jiang and Sridharan. "Two-Player Games for Efficient Non-Convex +> Constrained Optimization". +> [https://arxiv.org/abs/1804.06500](https://arxiv.org/abs/1804.06500) + +The formulation used by both of the SwapRegretOptimizers can be found in +Definition 2, and is discussed in Section 4. The +`MultiplicativeSwapRegretOptimizer` is most similar to Algorithm 2 in Section 4, +with the difference being that it uses `tf.train.Optimizer`s, instead of SGD, +for the "inner" updates. The `AdditiveSwapRegretOptimizer` differs further in +that it performs additive (instead of multiplicative) updates of the stochastic +matrix. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import abc +import math + +import six + +from tensorflow.contrib.constrained_optimization.python import constrained_optimizer + +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import standard_ops +from tensorflow.python.ops import state_ops +from tensorflow.python.training import optimizer as train_optimizer + + +def _maximal_eigenvector_power_method(matrix, + epsilon=1e-6, + maximum_iterations=100): + """Returns the maximal right-eigenvector of `matrix` using the power method. + + Args: + matrix: 2D Tensor, the matrix of which we will find the maximal + right-eigenvector. + epsilon: nonnegative float, if two iterations of the power method differ (in + L2 norm) by no more than epsilon, we will terminate. + maximum_iterations: nonnegative int, if we perform this many iterations, we + will terminate. + + Result: + The maximal right-eigenvector of `matrix`. + + Raises: + ValueError: If the epsilon or maximum_iterations parameters violate their + bounds. + """ + if epsilon <= 0.0: + raise ValueError("epsilon must be strictly positive") + if maximum_iterations <= 0: + raise ValueError("maximum_iterations must be strictly positive") + + def while_loop_condition(iteration, eigenvector, old_eigenvector): + """Returns false if the while loop should terminate.""" + not_done = (iteration < maximum_iterations) + not_converged = (standard_ops.norm(eigenvector - old_eigenvector) > epsilon) + return standard_ops.logical_and(not_done, not_converged) + + def while_loop_body(iteration, eigenvector, old_eigenvector): + """Performs one iteration of the power method.""" + del old_eigenvector # Needed by the condition, but not the body. + iteration += 1 + # We need to use tf.matmul() and tf.expand_dims(), instead of + # tf.tensordot(), since the former will infer the shape of the result, while + # the latter will not (tf.while_loop() needs the shapes). + new_eigenvector = standard_ops.matmul( + matrix, standard_ops.expand_dims(eigenvector, 1))[:, 0] + new_eigenvector /= standard_ops.norm(new_eigenvector) + return (iteration, new_eigenvector, eigenvector) + + iteration = standard_ops.constant(0) + eigenvector = standard_ops.ones_like(matrix[:, 0]) + eigenvector /= standard_ops.norm(eigenvector) + + # We actually want a do-while loop, so we explicitly call while_loop_body() + # once before tf.while_loop(). + iteration, eigenvector, old_eigenvector = while_loop_body( + iteration, eigenvector, eigenvector) + iteration, eigenvector, old_eigenvector = control_flow_ops.while_loop( + while_loop_condition, + while_loop_body, + loop_vars=(iteration, eigenvector, old_eigenvector), + name="power_method") + + return eigenvector + + +def _project_stochastic_matrix_wrt_euclidean_norm(matrix): + """Projects its argument onto the set of left-stochastic matrices. + + This algorithm is O(n^3) at worst, where `matrix` is n*n. It can be done in + O(n^2 * log(n)) time by sorting each column (and maybe better with a different + algorithm), but the algorithm implemented here is easier to implement in + TensorFlow. + + Args: + matrix: 2d square tensor, the matrix to project. + + Returns: + The 2d square tensor that results from projecting `matrix` onto the set of + left-stochastic matrices w.r.t. the Euclidean norm applied column-wise + (i.e. the Frobenius norm). + + Raises: + ValueError: if the `matrix` tensor does not have a fully-known shape, or is + not two-dimensional and square. + """ + matrix_shape = matrix.get_shape() + if matrix_shape is None: + raise ValueError("matrix must have known shape") + if matrix_shape.ndims != 2: + raise ValueError( + "matrix must be two dimensional (instead is %d-dimensional)" % + matrix_shape.ndims) + if matrix_shape[0] != matrix_shape[1]: + raise ValueError("matrix must be be square (instead has shape (%d,%d))" % + (matrix_shape[0], matrix_shape[1])) + dimension = matrix_shape[0].value + if dimension is None: + raise ValueError("matrix must have fully-known shape") + + def while_loop_condition(iteration, matrix, inactive, old_inactive): + """Returns false if the while loop should terminate.""" + del matrix # Needed by the body, but not the condition. + not_done = (iteration < dimension) + not_converged = standard_ops.reduce_any( + standard_ops.not_equal(inactive, old_inactive)) + return standard_ops.logical_and(not_done, not_converged) + + def while_loop_body(iteration, matrix, inactive, old_inactive): + """Performs one iteration of the projection.""" + del old_inactive # Needed by the condition, but not the body. + iteration += 1 + scale = (1.0 - standard_ops.reduce_sum( + matrix, axis=0, keep_dims=True)) / standard_ops.maximum( + 1.0, standard_ops.reduce_sum(inactive, axis=0, keep_dims=True)) + matrix += scale * inactive + new_inactive = standard_ops.to_float(matrix > 0) + matrix *= new_inactive + return (iteration, matrix, new_inactive, inactive) + + iteration = standard_ops.constant(0) + inactive = standard_ops.ones_like(matrix) + + # We actually want a do-while loop, so we explicitly call while_loop_body() + # once before tf.while_loop(). + iteration, matrix, inactive, old_inactive = while_loop_body( + iteration, matrix, inactive, inactive) + iteration, matrix, inactive, old_inactive = control_flow_ops.while_loop( + while_loop_condition, + while_loop_body, + loop_vars=(iteration, matrix, inactive, old_inactive), + name="euclidean_projection") + + return matrix + + +def _project_log_stochastic_matrix_wrt_kl_divergence(log_matrix): + """Projects its argument onto the set of log-left-stochastic matrices. + + Args: + log_matrix: 2d square tensor, the element-wise logarithm of the matrix to + project. + + Returns: + The 2d square tensor that results from projecting exp(`matrix`) onto the set + of left-stochastic matrices w.r.t. the KL-divergence applied column-wise. + """ + + # For numerical reasons, make sure that the largest matrix element is zero + # before exponentiating. + log_matrix -= standard_ops.reduce_max(log_matrix, axis=0, keep_dims=True) + log_matrix -= standard_ops.log( + standard_ops.reduce_sum( + standard_ops.exp(log_matrix), axis=0, keep_dims=True)) + return log_matrix + + +@six.add_metaclass(abc.ABCMeta) +class _SwapRegretOptimizer(constrained_optimizer.ConstrainedOptimizer): + """Base class representing a `_SwapRegretOptimizer`. + + This class contains most of the logic for performing constrained optimization, + minimizing external regret for the constraints player. What it *doesn't* do is + keep track of the internal state (the stochastic matrix). Instead, the state + is accessed via the _initial_state(), _stochastic_matrix(), + _constraint_grad_and_var() and _projection_op() methods. + + The reason for this is that we want to make it easy to implement different + representations of the internal state. For example, for additive updates, it's + most natural to store the stochastic matrix directly, whereas for + multiplicative updates, it's most natural to store its element-wise logarithm. + + For more specifics, please refer to: + + > Cotter, Jiang and Sridharan. "Two-Player Games for Efficient Non-Convex + > Constrained Optimization". + > [https://arxiv.org/abs/1804.06500](https://arxiv.org/abs/1804.06500) + + The formulation used by `_SwapRegretOptimizer`s can be found in Definition 2, + and is discussed in Section 4. Such optimizers are most similar to Algorithm + 2 in Section 4. Most notably, the internal state is a left-stochastic matrix + of shape (m+1,m+1), where m is the number of constraints. + """ + + def __init__(self, optimizer, constraint_optimizer=None): + """Constructs a new `_SwapRegretOptimizer`. + + The difference between `optimizer` and `constraint_optimizer` (if the latter + is provided) is that the former is used for learning the model parameters, + while the latter us used for the update to the constraint/objective weight + matrix (the analogue of Lagrange multipliers). If no `constraint_optimizer` + is provided, then `optimizer` is used for both. + + Args: + optimizer: tf.train.Optimizer, used to optimize the objective and + proxy_constraints portion of ConstrainedMinimizationProblem. If + constraint_optimizer is not provided, this will also be used to optimize + the Lagrange multiplier analogues. + constraint_optimizer: optional tf.train.Optimizer, used to optimize the + Lagrange multiplier analogues. + + Returns: + A new `_SwapRegretOptimizer`. + """ + super(_SwapRegretOptimizer, self).__init__(optimizer=optimizer) + self._constraint_optimizer = constraint_optimizer + + @property + def constraint_optimizer(self): + """Returns the `tf.train.Optimizer` used for the matrix.""" + return self._constraint_optimizer + + @abc.abstractmethod + def _initial_state(self, num_constraints): + pass + + @abc.abstractmethod + def _stochastic_matrix(self, state): + pass + + def _distribution(self, state): + distribution = _maximal_eigenvector_power_method( + self._stochastic_matrix(state)) + distribution = standard_ops.abs(distribution) + distribution /= standard_ops.reduce_sum(distribution) + return distribution + + @abc.abstractmethod + def _constraint_grad_and_var(self, state, gradient): + pass + + @abc.abstractmethod + def _projection_op(self, state, name=None): + pass + + def minimize_constrained(self, + minimization_problem, + global_step=None, + var_list=None, + gate_gradients=train_optimizer.Optimizer.GATE_OP, + aggregation_method=None, + colocate_gradients_with_ops=False, + name=None, + grad_loss=None): + """Returns an `Op` for minimizing the constrained problem. + + The `optimizer` constructor parameter will be used to update the model + parameters, while the constraint/objective weight matrix (the analogue of + Lagrange multipliers) will be updated using `constrained_optimizer` (if + provided) or `optimizer` (if not). Whether the matrix updates are additive + or multiplicative depends on the derived class. + + Args: + minimization_problem: ConstrainedMinimizationProblem, the problem to + optimize. + global_step: as in `tf.train.Optimizer`'s `minimize` method. + var_list: as in `tf.train.Optimizer`'s `minimize` method. + gate_gradients: as in `tf.train.Optimizer`'s `minimize` method. + aggregation_method: as in `tf.train.Optimizer`'s `minimize` method. + colocate_gradients_with_ops: as in `tf.train.Optimizer`'s `minimize` + method. + name: as in `tf.train.Optimizer`'s `minimize` method. + grad_loss: as in `tf.train.Optimizer`'s `minimize` method. + + Returns: + TensorFlow Op. + """ + objective = minimization_problem.objective + + constraints = minimization_problem.constraints + proxy_constraints = minimization_problem.proxy_constraints + if proxy_constraints is None: + proxy_constraints = constraints + # Flatten both constraints tensors to 1d. + num_constraints = minimization_problem.num_constraints + constraints = standard_ops.reshape(constraints, shape=(num_constraints,)) + proxy_constraints = standard_ops.reshape( + proxy_constraints, shape=(num_constraints,)) + + # We use a lambda to initialize the state so that, if this function call is + # inside the scope of a tf.control_dependencies() block, the dependencies + # will not be applied to the initializer. + state = standard_ops.Variable( + lambda: self._initial_state(num_constraints), + trainable=False, + name="swap_regret_optimizer_state") + + zero_and_constraints = standard_ops.concat( + (standard_ops.zeros((1,)), constraints), axis=0) + objective_and_proxy_constraints = standard_ops.concat( + (standard_ops.expand_dims(objective, 0), proxy_constraints), axis=0) + + distribution = self._distribution(state) + loss = standard_ops.tensordot(distribution, objective_and_proxy_constraints, + 1) + matrix_gradient = standard_ops.matmul( + standard_ops.expand_dims(zero_and_constraints, 1), + standard_ops.expand_dims(distribution, 0)) + + update_ops = [] + if self.constraint_optimizer is None: + # If we don't have a separate constraint_optimizer, then we use + # self._optimizer for both the update of the model parameters, and that of + # the internal state. + grads_and_vars = self.optimizer.compute_gradients( + loss, + var_list=var_list, + gate_gradients=gate_gradients, + aggregation_method=aggregation_method, + colocate_gradients_with_ops=colocate_gradients_with_ops, + grad_loss=grad_loss) + grads_and_vars.append( + self._constraint_grad_and_var(state, matrix_gradient)) + update_ops.append( + self.optimizer.apply_gradients(grads_and_vars, name="update")) + else: + # If we have a separate constraint_optimizer, then we use self._optimizer + # for the update of the model parameters, and self._constraint_optimizer + # for that of the internal state. + grads_and_vars = self.optimizer.compute_gradients( + loss, + var_list=var_list, + gate_gradients=gate_gradients, + aggregation_method=aggregation_method, + colocate_gradients_with_ops=colocate_gradients_with_ops, + grad_loss=grad_loss) + matrix_grads_and_vars = [ + self._constraint_grad_and_var(state, matrix_gradient) + ] + + gradients = [ + gradient for gradient, _ in grads_and_vars + matrix_grads_and_vars + if gradient is not None + ] + with ops.control_dependencies(gradients): + update_ops.append( + self.optimizer.apply_gradients(grads_and_vars, name="update")) + update_ops.append( + self.constraint_optimizer.apply_gradients( + matrix_grads_and_vars, name="optimizer_state_update")) + + with ops.control_dependencies(update_ops): + if global_step is None: + # If we don't have a global step, just project, and we're done. + return self._projection_op(state, name=name) + else: + # If we have a global step, then we need to increment it in addition to + # projecting. + projection_op = self._projection_op(state, name="project") + with ops.colocate_with(global_step): + global_step_op = state_ops.assign_add( + global_step, 1, name="global_step_increment") + return control_flow_ops.group(projection_op, global_step_op, name=name) + + +class AdditiveSwapRegretOptimizer(_SwapRegretOptimizer): + """A `ConstrainedOptimizer` based on swap-regret minimization. + + This `ConstrainedOptimizer` uses the given `tf.train.Optimizer`s to jointly + minimize over the model parameters, and maximize over constraint/objective + weight matrix (the analogue of Lagrange multipliers), with the latter + maximization using additive updates and an algorithm that minimizes swap + regret. + + For more specifics, please refer to: + + > Cotter, Jiang and Sridharan. "Two-Player Games for Efficient Non-Convex + > Constrained Optimization". + > [https://arxiv.org/abs/1804.06500](https://arxiv.org/abs/1804.06500) + + The formulation used by this optimizer can be found in Definition 2, and is + discussed in Section 4. It is most similar to Algorithm 2 in Section 4, with + the differences being that it uses `tf.train.Optimizer`s, instead of SGD, for + the "inner" updates, and performs additive (instead of multiplicative) updates + of the stochastic matrix. + """ + + def __init__(self, optimizer, constraint_optimizer=None): + """Constructs a new `AdditiveSwapRegretOptimizer`. + + Args: + optimizer: tf.train.Optimizer, used to optimize the objective and + proxy_constraints portion of ConstrainedMinimizationProblem. If + constraint_optimizer is not provided, this will also be used to optimize + the Lagrange multiplier analogues. + constraint_optimizer: optional tf.train.Optimizer, used to optimize the + Lagrange multiplier analogues. + + Returns: + A new `AdditiveSwapRegretOptimizer`. + """ + # TODO(acotter): add a parameter determining the initial values of the + # matrix elements (like initial_multiplier_radius in + # MultiplicativeSwapRegretOptimizer). + super(AdditiveSwapRegretOptimizer, self).__init__( + optimizer=optimizer, constraint_optimizer=constraint_optimizer) + + def _initial_state(self, num_constraints): + # For an AdditiveSwapRegretOptimizer, the internal state is a tensor of + # shape (m+1,m+1), where m is the number of constraints, representing a + # left-stochastic matrix. + dimension = num_constraints + 1 + # Initialize by putting all weight on the objective, and none on the + # constraints. + return standard_ops.concat( + (standard_ops.ones( + (1, dimension)), standard_ops.zeros((dimension - 1, dimension))), + axis=0) + + def _stochastic_matrix(self, state): + return state + + def _constraint_grad_and_var(self, state, gradient): + # TODO(acotter): tf.colocate_with(), if colocate_gradients_with_ops is True? + return (-gradient, state) + + def _projection_op(self, state, name=None): + with ops.colocate_with(state): + return state_ops.assign( + state, + _project_stochastic_matrix_wrt_euclidean_norm(state), + name=name) + + +class MultiplicativeSwapRegretOptimizer(_SwapRegretOptimizer): + """A `ConstrainedOptimizer` based on swap-regret minimization. + + This `ConstrainedOptimizer` uses the given `tf.train.Optimizer`s to jointly + minimize over the model parameters, and maximize over constraint/objective + weight matrix (the analogue of Lagrange multipliers), with the latter + maximization using multiplicative updates and an algorithm that minimizes swap + regret. + + For more specifics, please refer to: + + > Cotter, Jiang and Sridharan. "Two-Player Games for Efficient Non-Convex + > Constrained Optimization". + > [https://arxiv.org/abs/1804.06500](https://arxiv.org/abs/1804.06500) + + The formulation used by this optimizer can be found in Definition 2, and is + discussed in Section 4. It is most similar to Algorithm 2 in Section 4, with + the difference being that it uses `tf.train.Optimizer`s, instead of SGD, for + the "inner" updates. + """ + + def __init__(self, + optimizer, + constraint_optimizer=None, + minimum_multiplier_radius=1e-3, + initial_multiplier_radius=None): + """Constructs a new `MultiplicativeSwapRegretOptimizer`. + + Args: + optimizer: tf.train.Optimizer, used to optimize the objective and + proxy_constraints portion of ConstrainedMinimizationProblem. If + constraint_optimizer is not provided, this will also be used to optimize + the Lagrange multiplier analogues. + constraint_optimizer: optional tf.train.Optimizer, used to optimize the + Lagrange multiplier analogues. + minimum_multiplier_radius: float, each element of the matrix will be lower + bounded by `minimum_multiplier_radius` divided by one plus the number of + constraints. + initial_multiplier_radius: float, the initial value of each element of the + matrix associated with a constraint (i.e. excluding those elements + associated with the objective) will be `initial_multiplier_radius` + divided by one plus the number of constraints. Defaults to the value of + `minimum_multiplier_radius`. + + Returns: + A new `MultiplicativeSwapRegretOptimizer`. + + Raises: + ValueError: If the two radius parameters are inconsistent. + """ + super(MultiplicativeSwapRegretOptimizer, self).__init__( + optimizer=optimizer, constraint_optimizer=constraint_optimizer) + + if (minimum_multiplier_radius <= 0.0) or (minimum_multiplier_radius >= 1.0): + raise ValueError("minimum_multiplier_radius must be in the range (0,1)") + if initial_multiplier_radius is None: + initial_multiplier_radius = minimum_multiplier_radius + elif (initial_multiplier_radius < + minimum_multiplier_radius) or (minimum_multiplier_radius > 1.0): + raise ValueError("initial_multiplier_radius must be in the range " + "[minimum_multiplier_radius,1]") + + self._minimum_multiplier_radius = minimum_multiplier_radius + self._initial_multiplier_radius = initial_multiplier_radius + + def _initial_state(self, num_constraints): + # For a MultiplicativeSwapRegretOptimizer, the internal state is a tensor of + # shape (m+1,m+1), where m is the number of constraints, representing the + # element-wise logarithm of a left-stochastic matrix. + dimension = num_constraints + 1 + # Initialize by putting as much weight as possible on the objective, and as + # little as possible on the constraints. + log_initial_one = math.log(1.0 - (self._initial_multiplier_radius * + (dimension - 1) / (dimension))) + log_initial_zero = math.log(self._initial_multiplier_radius / dimension) + return standard_ops.concat( + (standard_ops.constant( + log_initial_one, dtype=dtypes.float32, shape=(1, dimension)), + standard_ops.constant( + log_initial_zero, + dtype=dtypes.float32, + shape=(dimension - 1, dimension))), + axis=0) + + def _stochastic_matrix(self, state): + return standard_ops.exp(state) + + def _constraint_grad_and_var(self, state, gradient): + # TODO(acotter): tf.colocate_with(), if colocate_gradients_with_ops is True? + return (-gradient, state) + + def _projection_op(self, state, name=None): + with ops.colocate_with(state): + # Gets the dimension of the state (num_constraints + 1)--all of these + # assertions are of things that should be impossible, since the state + # passed into this method will have the same shape as that returned by + # _initial_state(). + state_shape = state.get_shape() + assert state_shape is not None + assert state_shape.ndims == 2 + assert state_shape[0] == state_shape[1] + dimension = state_shape[0].value + assert dimension is not None + + minimum_log_multiplier = standard_ops.log( + self._minimum_multiplier_radius / standard_ops.to_float(dimension)) + + return state_ops.assign( + state, + standard_ops.maximum( + _project_log_stochastic_matrix_wrt_kl_divergence(state), + minimum_log_multiplier), + name=name) diff --git a/tensorflow/contrib/constrained_optimization/python/swap_regret_optimizer_test.py b/tensorflow/contrib/constrained_optimization/python/swap_regret_optimizer_test.py new file mode 100644 index 0000000000..34c4543dca --- /dev/null +++ b/tensorflow/contrib/constrained_optimization/python/swap_regret_optimizer_test.py @@ -0,0 +1,212 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for constrained_optimization.python.swap_regret_optimizer.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.contrib.constrained_optimization.python import swap_regret_optimizer +from tensorflow.contrib.constrained_optimization.python import test_util + +from tensorflow.python.ops import standard_ops +from tensorflow.python.platform import test +from tensorflow.python.training import gradient_descent + + +class AdditiveSwapRegretOptimizerWrapper( + swap_regret_optimizer.AdditiveSwapRegretOptimizer): + """Testing wrapper class around AdditiveSwapRegretOptimizer. + + This class is identical to AdditiveSwapRegretOptimizer, except that it caches + the internal optimization state when _stochastic_matrix() is called, so that + we can test that the stochastic matrices take on their expected values. + """ + + def __init__(self, optimizer, constraint_optimizer=None): + """Same as AdditiveSwapRegretOptimizer.__init__().""" + super(AdditiveSwapRegretOptimizerWrapper, self).__init__( + optimizer=optimizer, constraint_optimizer=constraint_optimizer) + self._cached_stochastic_matrix = None + + @property + def stochastic_matrix(self): + """Returns the cached stochastic matrix.""" + return self._cached_stochastic_matrix + + def _stochastic_matrix(self, state): + """Caches the internal state for testing.""" + self._cached_stochastic_matrix = super(AdditiveSwapRegretOptimizerWrapper, + self)._stochastic_matrix(state) + return self._cached_stochastic_matrix + + +class MultiplicativeSwapRegretOptimizerWrapper( + swap_regret_optimizer.MultiplicativeSwapRegretOptimizer): + """Testing wrapper class around MultiplicativeSwapRegretOptimizer. + + This class is identical to MultiplicativeSwapRegretOptimizer, except that it + caches the internal optimization state when _stochastic_matrix() is called, so + that we can test that the stochastic matrices take on their expected values. + """ + + def __init__(self, + optimizer, + constraint_optimizer=None, + minimum_multiplier_radius=None, + initial_multiplier_radius=None): + """Same as MultiplicativeSwapRegretOptimizer.__init__().""" + super(MultiplicativeSwapRegretOptimizerWrapper, self).__init__( + optimizer=optimizer, + constraint_optimizer=constraint_optimizer, + minimum_multiplier_radius=1e-3, + initial_multiplier_radius=initial_multiplier_radius) + self._cached_stochastic_matrix = None + + @property + def stochastic_matrix(self): + """Returns the cached stochastic matrix.""" + return self._cached_stochastic_matrix + + def _stochastic_matrix(self, state): + """Caches the internal state for testing.""" + self._cached_stochastic_matrix = super( + MultiplicativeSwapRegretOptimizerWrapper, + self)._stochastic_matrix(state) + return self._cached_stochastic_matrix + + +class SwapRegretOptimizerTest(test.TestCase): + + def test_maximum_eigenvector_power_method(self): + """Tests power method routine on some known left-stochastic matrices.""" + matrix1 = np.matrix([[0.6, 0.1, 0.1], [0.0, 0.6, 0.9], [0.4, 0.3, 0.0]]) + matrix2 = np.matrix([[0.4, 0.4, 0.2], [0.2, 0.1, 0.5], [0.4, 0.5, 0.3]]) + + with self.test_session() as session: + eigenvector1 = session.run( + swap_regret_optimizer._maximal_eigenvector_power_method( + standard_ops.constant(matrix1))) + eigenvector2 = session.run( + swap_regret_optimizer._maximal_eigenvector_power_method( + standard_ops.constant(matrix2))) + + # Check that eigenvector1 and eigenvector2 are eigenvectors of matrix1 and + # matrix2 (respectively) with associated eigenvalue 1. + matrix_eigenvector1 = np.tensordot(matrix1, eigenvector1, axes=1) + matrix_eigenvector2 = np.tensordot(matrix2, eigenvector2, axes=1) + self.assertAllClose(eigenvector1, matrix_eigenvector1, rtol=0, atol=1e-6) + self.assertAllClose(eigenvector2, matrix_eigenvector2, rtol=0, atol=1e-6) + + def test_project_stochastic_matrix_wrt_euclidean_norm(self): + """Tests Euclidean projection routine on some known values.""" + matrix = standard_ops.constant([[-0.1, -0.1, 0.4], [-0.8, 0.4, 1.2], + [-0.3, 0.1, 0.2]]) + expected_projected_matrix = np.array([[0.6, 0.1, 0.1], [0.0, 0.6, 0.9], + [0.4, 0.3, 0.0]]) + + with self.test_session() as session: + projected_matrix = session.run( + swap_regret_optimizer._project_stochastic_matrix_wrt_euclidean_norm( + matrix)) + + self.assertAllClose( + expected_projected_matrix, projected_matrix, rtol=0, atol=1e-6) + + def test_project_log_stochastic_matrix_wrt_kl_divergence(self): + """Tests KL-divergence projection routine on some known values.""" + matrix = standard_ops.constant([[0.2, 0.8, 0.6], [0.1, 0.2, 1.5], + [0.2, 1.0, 0.9]]) + expected_projected_matrix = np.array([[0.4, 0.4, 0.2], [0.2, 0.1, 0.5], + [0.4, 0.5, 0.3]]) + + with self.test_session() as session: + projected_matrix = session.run( + standard_ops.exp( + swap_regret_optimizer. + _project_log_stochastic_matrix_wrt_kl_divergence( + standard_ops.log(matrix)))) + + self.assertAllClose( + expected_projected_matrix, projected_matrix, rtol=0, atol=1e-6) + + def test_additive_swap_regret_optimizer(self): + """Tests that the stochastic matrices update as expected.""" + minimization_problem = test_util.ConstantMinimizationProblem( + np.array([0.6, -0.1, 0.4])) + optimizer = AdditiveSwapRegretOptimizerWrapper( + gradient_descent.GradientDescentOptimizer(1.0)) + train_op = optimizer.minimize_constrained(minimization_problem) + + # Calculated using a numpy+python implementation of the algorithm. + expected_matrices = [ + np.array([[1.0, 1.0, 1.0, 1.0], [0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0]]), + np.array([[0.66666667, 1.0, 1.0, 1.0], [0.26666667, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0], [0.06666667, 0.0, 0.0, 0.0]]), + np.array([[0.41666667, 0.93333333, 1.0, + 0.98333333], [0.46666667, 0.05333333, 0.0, + 0.01333333], [0.0, 0.0, 0.0, 0.0], + [0.11666667, 0.01333333, 0.0, 0.00333333]]), + ] + + matrices = [] + with self.test_session() as session: + session.run(standard_ops.global_variables_initializer()) + while len(matrices) < len(expected_matrices): + matrices.append(session.run(optimizer.stochastic_matrix)) + session.run(train_op) + + for expected, actual in zip(expected_matrices, matrices): + self.assertAllClose(expected, actual, rtol=0, atol=1e-6) + + def test_multiplicative_swap_regret_optimizer(self): + """Tests that the stochastic matrices update as expected.""" + minimization_problem = test_util.ConstantMinimizationProblem( + np.array([0.6, -0.1, 0.4])) + optimizer = MultiplicativeSwapRegretOptimizerWrapper( + gradient_descent.GradientDescentOptimizer(1.0), + initial_multiplier_radius=0.8) + train_op = optimizer.minimize_constrained(minimization_problem) + + # Calculated using a numpy+python implementation of the algorithm. + expected_matrices = [ + np.array([[0.4, 0.4, 0.4, 0.4], [0.2, 0.2, 0.2, 0.2], + [0.2, 0.2, 0.2, 0.2], [0.2, 0.2, 0.2, 0.2]]), + np.array([[0.36999014, 0.38528351, 0.38528351, 0.38528351], [ + 0.23517483, 0.21720297, 0.21720297, 0.21720297 + ], [0.17774131, 0.18882719, 0.18882719, 0.18882719], + [0.21709373, 0.20868632, 0.20868632, 0.20868632]]), + np.array([[0.33972109, 0.36811863, 0.37118462, 0.36906575], [ + 0.27114826, 0.23738228, 0.23376693, 0.23626491 + ], [0.15712313, 0.17641793, 0.17858959, 0.17708679], + [0.23200752, 0.21808115, 0.21645886, 0.21758255]]), + ] + + matrices = [] + with self.test_session() as session: + session.run(standard_ops.global_variables_initializer()) + while len(matrices) < len(expected_matrices): + matrices.append(session.run(optimizer.stochastic_matrix)) + session.run(train_op) + + for expected, actual in zip(expected_matrices, matrices): + self.assertAllClose(expected, actual, rtol=0, atol=1e-6) + + +if __name__ == '__main__': + test.main() diff --git a/tensorflow/contrib/constrained_optimization/python/test_util.py b/tensorflow/contrib/constrained_optimization/python/test_util.py new file mode 100644 index 0000000000..704b36ca4c --- /dev/null +++ b/tensorflow/contrib/constrained_optimization/python/test_util.py @@ -0,0 +1,58 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Contains helpers used by tests.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.constrained_optimization.python import constrained_minimization_problem + +from tensorflow.python.framework import dtypes +from tensorflow.python.ops import standard_ops + + +class ConstantMinimizationProblem( + constrained_minimization_problem.ConstrainedMinimizationProblem): + """A `ConstrainedMinimizationProblem` with constant constraint violations. + + This minimization problem is intended for use in performing simple tests of + the Lagrange multiplier (or equivalent) update in the optimizers. There is a + one-element "dummy" model parameter, but it should be ignored. + """ + + def __init__(self, constraints): + """Constructs a new `ConstantMinimizationProblem'. + + Args: + constraints: 1d numpy array, the constant constraint violations. + + Returns: + A new `ConstantMinimizationProblem'. + """ + # We make an fake 1-parameter linear objective so that we don't get a "no + # variables to optimize" error. + self._objective = standard_ops.Variable(0.0, dtype=dtypes.float32) + self._constraints = standard_ops.constant(constraints, dtype=dtypes.float32) + + @property + def objective(self): + """Returns the objective function.""" + return self._objective + + @property + def constraints(self): + """Returns the constant constraint violations.""" + return self._constraints diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD index 7b508f87ab..677ea65edd 100644 --- a/tensorflow/tools/pip_package/BUILD +++ b/tensorflow/tools/pip_package/BUILD @@ -63,6 +63,7 @@ COMMON_PIP_DEPS = [ "//tensorflow/contrib/autograph/pyct/static_analysis:static_analysis", "//tensorflow/contrib/boosted_trees:boosted_trees_pip", "//tensorflow/contrib/cluster_resolver:cluster_resolver_pip", + "//tensorflow/contrib/constrained_optimization:constrained_optimization_pip", "//tensorflow/contrib/data/python/kernel_tests:dataset_serialization_test", "//tensorflow/contrib/data/python/ops:contrib_op_loader", "//tensorflow/contrib/eager/python/examples:examples_pip", -- GitLab From 762fa5f6ead8f662e5cc14420293cb369f2b9615 Mon Sep 17 00:00:00 2001 From: Suharsh Sivakumar Date: Mon, 23 Apr 2018 15:57:16 -0700 Subject: [PATCH 174/434] FakeQuant operations before ReLUs (occurs after bypass nodes) aren't needed. PiperOrigin-RevId: 193999591 --- .../contrib/quantize/python/quantize.py | 68 ++++++++++++------- .../quantize/python/quantize_graph_test.py | 14 ---- .../contrib/quantize/python/quantize_test.py | 57 ++++++++++++---- 3 files changed, 87 insertions(+), 52 deletions(-) diff --git a/tensorflow/contrib/quantize/python/quantize.py b/tensorflow/contrib/quantize/python/quantize.py index d2d0426d23..efc1a94b3c 100644 --- a/tensorflow/contrib/quantize/python/quantize.py +++ b/tensorflow/contrib/quantize/python/quantize.py @@ -133,19 +133,27 @@ def Quantize(graph, bits=activation_bits, producer_scope=scope, consumer_scope=scope) - _InsertQuantOp( - add_context, - 'add_quant', - layer_match.bypass_op, - input_to_ops_map.ConsumerOperations(layer_match.bypass_op), - is_training, - moving_avg=True, - ema_decay=ema_decay, - quant_delay=quant_delay, - vars_collection=vars_collection, - bits=activation_bits, - producer_scope=scope, - consumer_scope=scope) + # Make sure the op following this isn't an activation. In which case, we + # shouldn't quantize it, since the activation will be Fused into the + # Add at inference time. + consumers = input_to_ops_map.ConsumerOperations(layer_match.bypass_op) + if any([consumer.type in _ACTIVATION_TYPES for consumer in consumers]): + logging.info('Skipping %s, because its followed by an activation.', + layer_match.bypass_op.name) + else: + _InsertQuantOp( + add_context, + 'add_quant', + layer_match.bypass_op, + input_to_ops_map.ConsumerOperations(layer_match.bypass_op), + is_training, + moving_avg=True, + ema_decay=ema_decay, + quant_delay=quant_delay, + vars_collection=vars_collection, + bits=activation_bits, + producer_scope=scope, + consumer_scope=scope) # Quantize bypass ops that occur after the activation. if layer_match.post_activation_bypass_op is not None: @@ -153,19 +161,27 @@ def Quantize(graph, r'^(.*)/([^/]+)', layer_match.post_activation_bypass_op.name).group(1) # If `scope` is given, only quantize it if the producer is in the right # scope. - _InsertQuantOp( - post_activation_bypass_context, - 'post_activation_bypass_quant', - layer_match.post_activation_bypass_op, - input_to_ops_map.ConsumerOperations( - layer_match.post_activation_bypass_op), - is_training, - moving_avg=True, - ema_decay=ema_decay, - quant_delay=quant_delay, - vars_collection=vars_collection, - bits=activation_bits, - producer_scope=scope) + # Make sure the op following this isn't an activation. In which case, we + # shouldn't quantize it, since the activation will be Fused into the + # Add at inference time. + consumers = input_to_ops_map.ConsumerOperations( + layer_match.post_activation_bypass_op) + if any([consumer.type in _ACTIVATION_TYPES for consumer in consumers]): + logging.info('Skipping %s, because its followed by an activation.', + layer_match.post_activation_bypass_op.name) + else: + _InsertQuantOp( + post_activation_bypass_context, + 'post_activation_bypass_quant', + layer_match.post_activation_bypass_op, + consumers, + is_training, + moving_avg=True, + ema_decay=ema_decay, + quant_delay=quant_delay, + vars_collection=vars_collection, + bits=activation_bits, + producer_scope=scope) def _FindLayersToQuantize(graph): diff --git a/tensorflow/contrib/quantize/python/quantize_graph_test.py b/tensorflow/contrib/quantize/python/quantize_graph_test.py index caf8ff28d5..54faf582f1 100644 --- a/tensorflow/contrib/quantize/python/quantize_graph_test.py +++ b/tensorflow/contrib/quantize/python/quantize_graph_test.py @@ -113,20 +113,6 @@ class QuantizeGraphTest(test_util.TensorFlowTestCase): # Ensure that variables were added. self.assertTrue(len(orig_variable_names) < len(q_variables)) - def testWithPreActivationBypass(self): - self._RunTestOverAllRewrites(self._TestWithPreActivationBypass) - - def _TestWithPreActivationBypass(self, rewrite_fn): - # Tests that the default graph is correctly used when no args are provided - # to rewrite_fn. - with ops.Graph().as_default() as g: - self._ConvLayer(pre_activation_bypass=True, scope='scope1') - rewrite_fn() - - op_names = [op.name for op in g.get_operations()] - self.assertTrue( - any('scope1/add_quant/' in name for name in op_names)) - def testWithPostActivationBypass(self): self._RunTestOverAllRewrites(self._TestWithPostActivationBypass) diff --git a/tensorflow/contrib/quantize/python/quantize_test.py b/tensorflow/contrib/quantize/python/quantize_test.py index d37c83d683..5e479f3946 100644 --- a/tensorflow/contrib/quantize/python/quantize_test.py +++ b/tensorflow/contrib/quantize/python/quantize_test.py @@ -82,9 +82,22 @@ class QuantizeTest(test_util.TensorFlowTestCase): quantize.Quantize(graph, is_training, weight_bits=8, activation_bits=8) quantization_node_name = 'FakeQuantWithMinMaxVars' - add_quant = graph.get_operation_by_name('test/add_quant/' + - quantization_node_name) - self.assertEqual(add_quant.type, quantization_node_name) + conv_quant = graph.get_operation_by_name('test/test/conv_quant/' + + quantization_node_name) + self.assertEqual(conv_quant.type, quantization_node_name) + + # Scan through all FakeQuant operations, ensuring that the activation + # isn't in the consumers of the operation. Since activations are folded + # the preceding operation during inference, the FakeQuant operation after + # the activation is all that is needed. + for op in graph.get_operations(): + if op.type == quantization_node_name: + quant_op = graph.get_operation_by_name(op.name) + consumers = [] + for output in quant_op.outputs: + consumers.extend(output.consumers()) + + self.assertNotIn('test/identity', [c.name for c in consumers]) def testInsertQuantOpForAddAfterSeparableConv2d(self): self._RunTestOverParameters( @@ -109,9 +122,20 @@ class QuantizeTest(test_util.TensorFlowTestCase): quantize.Quantize(graph, is_training, weight_bits=8, activation_bits=8) quantization_node_name = 'FakeQuantWithMinMaxVars' - add_quant = graph.get_operation_by_name('test/add_quant/' + - quantization_node_name) - self.assertEqual(add_quant.type, quantization_node_name) + conv_quant = graph.get_operation_by_name('test/test/conv_quant/' + + quantization_node_name) + self.assertEqual(conv_quant.type, quantization_node_name) + + for op in graph.get_operations(): + if op.type == quantization_node_name: + quant_op = graph.get_operation_by_name(op.name) + # Scan through all FakeQuant operations, ensuring that the activation + # identity op isn't in the consumers of the operation. + consumers = [] + for output in quant_op.outputs: + consumers.extend(output.consumers()) + + self.assertNotIn('test/identity', [c.name for c in consumers]) def testFinalLayerQuantized(self): self._RunTestOverParameters(self._TestFinalLayerQuantized) @@ -153,12 +177,21 @@ class QuantizeTest(test_util.TensorFlowTestCase): activation_fn=array_ops.identity, scope='test/test') bypass_tensor = math_ops.add(conv, input2, name='test/add') - _ = array_ops.identity(bypass_tensor, name='test/output') + # The output of the post_activation bypass will be another layer. + _ = conv2d( + bypass_tensor, + 32, [5, 5], + stride=2, + padding='SAME', + weights_initializer=self._WeightInit(0.09), + activation_fn=array_ops.identity, + scope='test/unused') quantize.Quantize(graph, is_training, weight_bits=8, activation_bits=8) - # Ensure that the bypass node is preceded and followed by - # FakeQuantWithMinMaxVars operations. + # Ensure that the bypass node is preceded by and followed by a + # FakeQuantWithMinMaxVar operation, since the output of the Add isn't an + # activation. self.assertTrue('FakeQuantWithMinMaxVars' in [c.type for c in bypass_tensor.consumers()]) self.assertTrue('FakeQuantWithMinMaxVars' in @@ -198,9 +231,9 @@ class QuantizeTest(test_util.TensorFlowTestCase): quantize.Quantize(graph, is_training, weight_bits=8, activation_bits=8) - # Ensure that the bypass node is preceded and followed by - # FakeQuantWithMinMaxVars operations. - self.assertTrue('FakeQuantWithMinMaxVars' in + # Ensure that the bypass node is preceded by a FakeQuantWithMinMaxVar + # operation, and NOT followed by one. + self.assertTrue('FakeQuantWithMinMaxVars' not in [c.type for c in bypass_tensor.consumers()]) self.assertTrue('FakeQuantWithMinMaxVars' in [i.op.type for i in bypass_tensor.op.inputs]) -- GitLab From 5809ad4436863ac82279c66d6cff6a4bffd77878 Mon Sep 17 00:00:00 2001 From: Francois Chollet Date: Mon, 23 Apr 2018 16:27:00 -0700 Subject: [PATCH 175/434] Add `static_state_saving_rnn` back to the `nn` module. PiperOrigin-RevId: 194003971 --- tensorflow/python/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/python/__init__.py b/tensorflow/python/__init__.py index 13f8420a67..c1702ae13c 100644 --- a/tensorflow/python/__init__.py +++ b/tensorflow/python/__init__.py @@ -160,6 +160,7 @@ nn.dynamic_rnn = rnn.dynamic_rnn nn.static_rnn = rnn.static_rnn nn.raw_rnn = rnn.raw_rnn nn.bidirectional_dynamic_rnn = rnn.bidirectional_dynamic_rnn +nn.static_state_saving_rnn = rnn.static_state_saving_rnn nn.rnn_cell = rnn_cell # Symbols whitelisted for export without documentation. -- GitLab From ba39780114c648445d3285550bf7f5c1e9e8a251 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 23 Apr 2018 16:29:27 -0700 Subject: [PATCH 176/434] Avoid inlining the split handler functions as it slows down the trainer startup significantly. PiperOrigin-RevId: 194004319 --- .../learner/batch/ordinal_split_handler.py | 32 +++++++++++++++---- 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/tensorflow/contrib/boosted_trees/lib/learner/batch/ordinal_split_handler.py b/tensorflow/contrib/boosted_trees/lib/learner/batch/ordinal_split_handler.py index 7df514cd20..9d6cc9245a 100644 --- a/tensorflow/contrib/boosted_trees/lib/learner/batch/ordinal_split_handler.py +++ b/tensorflow/contrib/boosted_trees/lib/learner/batch/ordinal_split_handler.py @@ -417,9 +417,18 @@ class SparseSplitHandler(InequalitySplitHandler): return (are_splits_ready, partition_ids, gains, split_infos) -@function.Defun(dtypes.bool, dtypes.bool, dtypes.float32, dtypes.float32, - dtypes.int32, dtypes.float32, dtypes.float32, dtypes.float32, - dtypes.float32, dtypes.float32) +@function.Defun( + dtypes.bool, + dtypes.bool, + dtypes.float32, + dtypes.float32, + dtypes.int32, + dtypes.float32, + dtypes.float32, + dtypes.float32, + dtypes.float32, + dtypes.float32, + noinline=True) def dense_make_stats_update(is_active, are_buckets_ready, float_column, quantile_buckets, example_partition_ids, gradients, hessians, weights, empty_gradients, empty_hessians): @@ -452,9 +461,20 @@ def dense_make_stats_update(is_active, are_buckets_ready, float_column, gradients, hessians) -@function.Defun(dtypes.bool, dtypes.bool, dtypes.int64, dtypes.float32, - dtypes.int64, dtypes.float32, dtypes.int32, dtypes.float32, - dtypes.float32, dtypes.float32, dtypes.float32, dtypes.float32) +@function.Defun( + dtypes.bool, + dtypes.bool, + dtypes.int64, + dtypes.float32, + dtypes.int64, + dtypes.float32, + dtypes.int32, + dtypes.float32, + dtypes.float32, + dtypes.float32, + dtypes.float32, + dtypes.float32, + noinline=True) def sparse_make_stats_update( is_active, are_buckets_ready, sparse_column_indices, sparse_column_values, sparse_column_shape, quantile_buckets, example_partition_ids, gradients, -- GitLab From a72155d58726d4dbb92d5d6b0f3290976bbdaa1c Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Mon, 23 Apr 2018 16:33:27 -0700 Subject: [PATCH 177/434] Small fast path for binary_op_wrapper PiperOrigin-RevId: 194004866 --- tensorflow/python/ops/math_ops.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py index 01d670ea2d..2b04866fef 100644 --- a/tensorflow/python/ops/math_ops.py +++ b/tensorflow/python/ops/math_ops.py @@ -965,7 +965,9 @@ def _OverrideBinaryOperatorHelper(func, op_name, clazz_object=ops.Tensor): def binary_op_wrapper(x, y): with ops.name_scope(None, op_name, [x, y]) as name: - if not isinstance(y, sparse_tensor.SparseTensor): + if isinstance(x, ops.Tensor) and isinstance(y, ops.Tensor): + return func(x, y, name=name) + elif not isinstance(y, sparse_tensor.SparseTensor): try: y = ops.convert_to_tensor(y, dtype=x.dtype.base_dtype, name="y") except TypeError: -- GitLab From 84c73c2b4d0318bfd78a53ab6051169795604650 Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Mon, 23 Apr 2018 16:46:41 -0700 Subject: [PATCH 178/434] TFTS: Support exogenous features in ARRegressor They get flattened with the endogenous features as input to the model. Unlike endogenous features, they're specified for the whole window when making predictions. Adds an ARRegressor example which uses exogenous features. PiperOrigin-RevId: 194006630 --- .../timeseries/examples/known_anomaly.py | 75 +++++--- .../timeseries/examples/known_anomaly_test.py | 18 +- .../timeseries/python/timeseries/ar_model.py | 173 ++++++++++++++---- .../python/timeseries/ar_model_test.py | 8 +- .../python/timeseries/estimators.py | 11 +- .../python/timeseries/estimators_test.py | 48 +++-- 6 files changed, 255 insertions(+), 78 deletions(-) diff --git a/tensorflow/contrib/timeseries/examples/known_anomaly.py b/tensorflow/contrib/timeseries/examples/known_anomaly.py index e77628ddd3..71621abc71 100644 --- a/tensorflow/contrib/timeseries/examples/known_anomaly.py +++ b/tensorflow/contrib/timeseries/examples/known_anomaly.py @@ -41,17 +41,8 @@ _MODULE_PATH = path.dirname(__file__) _DATA_FILE = path.join(_MODULE_PATH, "data/changepoints.csv") -def train_and_evaluate_exogenous(csv_file_name=_DATA_FILE, train_steps=300): - """Training, evaluating, and predicting on a series with changepoints.""" - - # Indicate the format of our exogenous feature, in this case a string - # representing a boolean value. - string_feature = tf.feature_column.categorical_column_with_vocabulary_list( - key="is_changepoint", vocabulary_list=["no", "yes"]) - # Specify the way this feature is presented to the model, here using a one-hot - # encoding. - one_hot_feature = tf.feature_column.indicator_column( - categorical_column=string_feature) +def state_space_esitmator(exogenous_feature_columns): + """Constructs a StructuralEnsembleRegressor.""" def _exogenous_update_condition(times, features): del times # unused @@ -62,14 +53,48 @@ def train_and_evaluate_exogenous(csv_file_name=_DATA_FILE, train_steps=300): # no changepoint. return tf.equal(tf.squeeze(features["is_changepoint"], axis=-1), "yes") - estimator = tf.contrib.timeseries.StructuralEnsembleRegressor( - periodicities=12, - # Extract a smooth period by constraining the number of latent values - # being cycled between. - cycle_num_latent_values=3, - num_features=1, - exogenous_feature_columns=[one_hot_feature], - exogenous_update_condition=_exogenous_update_condition) + return ( + tf.contrib.timeseries.StructuralEnsembleRegressor( + periodicities=12, + # Extract a smooth period by constraining the number of latent values + # being cycled between. + cycle_num_latent_values=3, + num_features=1, + exogenous_feature_columns=exogenous_feature_columns, + exogenous_update_condition=_exogenous_update_condition), + # Use truncated backpropagation with a window size of 64, batching + # together 4 of these windows (random offsets) per training step. Training + # with exogenous features often requires somewhat larger windows. + 4, 64) + + +def autoregressive_esitmator(exogenous_feature_columns): + input_window_size = 8 + output_window_size = 2 + return ( + tf.contrib.timeseries.ARRegressor( + periodicities=12, + num_features=1, + input_window_size=input_window_size, + output_window_size=output_window_size, + exogenous_feature_columns=exogenous_feature_columns), + 64, input_window_size + output_window_size) + + +def train_and_evaluate_exogenous( + estimator_fn, csv_file_name=_DATA_FILE, train_steps=300): + """Training, evaluating, and predicting on a series with changepoints.""" + # Indicate the format of our exogenous feature, in this case a string + # representing a boolean value. + string_feature = tf.feature_column.categorical_column_with_vocabulary_list( + key="is_changepoint", vocabulary_list=["no", "yes"]) + # Specify the way this feature is presented to the model, here using a one-hot + # encoding. + one_hot_feature = tf.feature_column.indicator_column( + categorical_column=string_feature) + + estimator, batch_size, window_size = estimator_fn( + exogenous_feature_columns=[one_hot_feature]) reader = tf.contrib.timeseries.CSVReader( csv_file_name, # Indicate the format of our CSV file. First we have two standard columns, @@ -85,10 +110,7 @@ def train_and_evaluate_exogenous(csv_file_name=_DATA_FILE, train_steps=300): # This CSV has a header line; here we just ignore it. skip_header_lines=1) train_input_fn = tf.contrib.timeseries.RandomWindowInputFn( - # Use truncated backpropagation with a window size of 64, batching - # together 4 of these windows (random offsets) per training step. Training - # with exogenous features often requires somewhat larger windows. - reader, batch_size=4, window_size=64) + reader, batch_size=batch_size, window_size=window_size) estimator.train(input_fn=train_input_fn, steps=train_steps) evaluation_input_fn = tf.contrib.timeseries.WholeDatasetInputFn(reader) evaluation = estimator.evaluate(input_fn=evaluation_input_fn, steps=1) @@ -145,7 +167,12 @@ def main(unused_argv): if not HAS_MATPLOTLIB: raise ImportError( "Please install matplotlib to generate a plot from this example.") - make_plot("Ignoring a known anomaly", *train_and_evaluate_exogenous()) + make_plot("Ignoring a known anomaly (state space)", + *train_and_evaluate_exogenous( + estimator_fn=state_space_esitmator)) + make_plot("Ignoring a known anomaly (autoregressive)", + *train_and_evaluate_exogenous( + estimator_fn=autoregressive_esitmator, train_steps=3000)) pyplot.show() diff --git a/tensorflow/contrib/timeseries/examples/known_anomaly_test.py b/tensorflow/contrib/timeseries/examples/known_anomaly_test.py index c3e307cad8..8c64f2e186 100644 --- a/tensorflow/contrib/timeseries/examples/known_anomaly_test.py +++ b/tensorflow/contrib/timeseries/examples/known_anomaly_test.py @@ -23,12 +23,24 @@ from tensorflow.contrib.timeseries.examples import known_anomaly from tensorflow.python.platform import test -class KnownAnaomalyExampleTest(test.TestCase): +class KnownAnomalyExampleTest(test.TestCase): - def test_shapes_and_variance_structural(self): + def test_shapes_and_variance_structural_ar(self): (times, observed, all_times, mean, upper_limit, lower_limit, anomaly_locations) = known_anomaly.train_and_evaluate_exogenous( - train_steps=50) + train_steps=1, estimator_fn=known_anomaly.autoregressive_esitmator) + self.assertAllEqual( + anomaly_locations, + [25, 50, 75, 100, 125, 150, 175, 249]) + self.assertAllEqual(all_times.shape, mean.shape) + self.assertAllEqual(all_times.shape, upper_limit.shape) + self.assertAllEqual(all_times.shape, lower_limit.shape) + self.assertAllEqual(times.shape, observed.shape) + + def test_shapes_and_variance_structural_ssm(self): + (times, observed, all_times, mean, upper_limit, lower_limit, + anomaly_locations) = known_anomaly.train_and_evaluate_exogenous( + train_steps=50, estimator_fn=known_anomaly.state_space_esitmator) self.assertAllEqual( anomaly_locations, [25, 50, 75, 100, 125, 150, 175, 249]) diff --git a/tensorflow/contrib/timeseries/python/timeseries/ar_model.py b/tensorflow/contrib/timeseries/python/timeseries/ar_model.py index 4f6527a546..558d9480b4 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/ar_model.py +++ b/tensorflow/contrib/timeseries/python/timeseries/ar_model.py @@ -60,7 +60,8 @@ class ARModel(model.TimeSeriesModel): num_features, num_time_buckets=10, loss=NORMAL_LIKELIHOOD_LOSS, - hidden_layer_sizes=None): + hidden_layer_sizes=None, + exogenous_feature_columns=None): """Constructs an auto-regressive model. Args: @@ -81,6 +82,11 @@ class ARModel(model.TimeSeriesModel): observations and predictions, while the training loss is computed on normalized data (if input statistics are available). hidden_layer_sizes: list of sizes of hidden layers. + exogenous_feature_columns: A list of `tf.feature_column`s (for example + `tf.feature_column.embedding_column`) corresponding to exogenous + features which provide extra information to the model but are not part + of the series to be predicted. Passed to + `tf.feature_column.input_layer`. """ self.input_window_size = input_window_size self.output_window_size = output_window_size @@ -90,7 +96,12 @@ class ARModel(model.TimeSeriesModel): self.window_size = self.input_window_size + self.output_window_size self.loss = loss super(ARModel, self).__init__( - num_features=num_features) + num_features=num_features, + exogenous_feature_columns=exogenous_feature_columns) + if exogenous_feature_columns is not None: + self.exogenous_size = self._get_exogenous_embedding_shape()[-1] + else: + self.exogenous_size = 0 assert num_time_buckets > 0 self._buckets = int(num_time_buckets) if periodicities is None or not periodicities: @@ -110,7 +121,10 @@ class ARModel(model.TimeSeriesModel): # that the serving input_receiver_fn gets placeholder shapes correct. return (array_ops.zeros([self.input_window_size], dtype=dtypes.int64), array_ops.zeros( - [self.input_window_size, self.num_features], dtype=self.dtype)) + [self.input_window_size, self.num_features], dtype=self.dtype), + array_ops.zeros( + [self.input_window_size, self.exogenous_size], + dtype=self.dtype)) # TODO(allenl,agarwal): Support sampling for AR. def random_model_parameters(self, seed=None): @@ -163,7 +177,7 @@ class ARModel(model.TimeSeriesModel): activations.append((activation, activation_size)) return activations - def prediction_ops(self, times, values): + def prediction_ops(self, times, values, exogenous_regressors): """Compute model predictions given input data. Args: @@ -173,6 +187,8 @@ class ARModel(model.TimeSeriesModel): prediction times. values: A [batch size, self.input_window_size, self.num_features] Tensor with input features. + exogenous_regressors: A [batch size, self.window_size, + self.exogenous_size] Tensor with exogenous features. Returns: Tuple (predicted_mean, predicted_covariance), where each element is a Tensor with shape [batch size, self.output_window_size, @@ -183,25 +199,33 @@ class ARModel(model.TimeSeriesModel): if self.input_window_size: values.get_shape().assert_is_compatible_with( [None, self.input_window_size, self.num_features]) + if exogenous_regressors is not None: + exogenous_regressors.get_shape().assert_is_compatible_with( + [None, self.window_size, self.exogenous_size]) # Create input features. + activation_components = [] if self._periods: _, time_features = self._compute_time_features(times) activation_size = self.window_size * self._buckets * len(self._periods) - activation = array_ops.reshape(time_features, [-1, activation_size]) + activation_components.append( + array_ops.reshape(time_features, [-1, activation_size])) else: activation_size = 0 - activation = None - if self.input_window_size: inp = array_ops.slice(values, [0, 0, 0], [-1, self.input_window_size, -1]) inp_size = self.input_window_size * self.num_features inp = array_ops.reshape(inp, [-1, inp_size]) - if activation is not None: - activation = array_ops.concat([inp, activation], 1) - else: - activation = inp + activation_components.append(inp) activation_size += inp_size + if self.exogenous_size: + exogenous_size = self.window_size * self.exogenous_size + activation_size += exogenous_size + exogenous_flattened = array_ops.reshape( + exogenous_regressors, [-1, exogenous_size]) + activation_components.append(exogenous_flattened) assert activation_size + assert activation_components + activation = array_ops.concat(activation_components, axis=1) activations.append((activation, activation_size)) # Create hidden layers. activations += self._create_hidden_stack(activation, activation_size) @@ -228,6 +252,19 @@ class ARModel(model.TimeSeriesModel): math_ops.reduce_prod(array_ops.shape(targets)), loss_op.dtype) return loss_op + def _process_exogenous_features(self, times, features): + embedded = super(ARModel, self)._process_exogenous_features( + times=times, features=features) + if embedded is None: + assert self.exogenous_size == 0 + # No embeddings. Return a zero-size [batch, times, 0] array so we don't + # have to special case it downstream. + return array_ops.zeros( + array_ops.concat([array_ops.shape(times), constant_op.constant([0])], + axis=0)) + else: + return embedded + # TODO(allenl, agarwal): Consider better ways of warm-starting predictions. def predict(self, features): """Computes predictions multiple steps into the future. @@ -243,6 +280,7 @@ class ARModel(model.TimeSeriesModel): segment of the time series before `TIMES`. This data is used to start of the autoregressive computation. This should have data for at least self.input_window_size timesteps. + And any exogenous features, with shapes prefixed by shape of `TIMES`. Returns: A dictionary with keys, "mean", "covariance". The values are Tensors of shape [batch_size, predict window size, @@ -250,25 +288,39 @@ class ARModel(model.TimeSeriesModel): """ predict_times = math_ops.cast( ops.convert_to_tensor(features[PredictionFeatures.TIMES]), dtypes.int32) + exogenous_regressors = self._process_exogenous_features( + times=predict_times, + features={key: value for key, value in features.items() + if key not in [TrainEvalFeatures.TIMES, + TrainEvalFeatures.VALUES, + PredictionFeatures.STATE_TUPLE]}) + with ops.control_dependencies( + [check_ops.assert_equal(array_ops.shape(predict_times)[1], + array_ops.shape(exogenous_regressors)[1])]): + exogenous_regressors = array_ops.identity(exogenous_regressors) batch_size = array_ops.shape(predict_times)[0] num_predict_values = array_ops.shape(predict_times)[1] prediction_iterations = ((num_predict_values + self.output_window_size - 1) // self.output_window_size) - # Pad predict_times so as to have exact multiple of self.output_window_size - # values per example. + # Pad predict_times and exogenous regressors so as to have exact multiple of + # self.output_window_size values per example. padding_size = (prediction_iterations * self.output_window_size - num_predict_values) - padding = array_ops.zeros([batch_size, padding_size], predict_times.dtype) - predict_times = control_flow_ops.cond( - padding_size > 0, lambda: array_ops.concat([predict_times, padding], 1), - lambda: predict_times) + predict_times = array_ops.pad( + predict_times, [[0, 0], [0, padding_size]]) + exogenous_regressors = array_ops.pad( + exogenous_regressors, [[0, 0], [0, padding_size], [0, 0]]) state = features[PredictionFeatures.STATE_TUPLE] - (state_times, state_values) = state + (state_times, state_values, state_exogenous_regressors) = state state_times = math_ops.cast( ops.convert_to_tensor(state_times), dtypes.int32) state_values = ops.convert_to_tensor(state_values, dtype=self.dtype) + state_exogenous_regressors = ops.convert_to_tensor( + state_exogenous_regressors, dtype=self.dtype) initial_input_times = predict_times[:, :self.output_window_size] + initial_input_exogenous_regressors = ( + exogenous_regressors[:, :self.output_window_size, :]) if self.input_window_size > 0: initial_input_times = array_ops.concat( [state_times[:, -self.input_window_size:], initial_input_times], 1) @@ -279,6 +331,11 @@ class ARModel(model.TimeSeriesModel): check_ops.assert_equal(values_size, times_size) ]): initial_input_values = state_values[:, -self.input_window_size:, :] + initial_input_exogenous_regressors = array_ops.concat( + [state_exogenous_regressors[:, -self.input_window_size:, :], + initial_input_exogenous_regressors[ + :, :self.output_window_size, :]], + axis=1) else: initial_input_values = 0 @@ -288,9 +345,10 @@ class ARModel(model.TimeSeriesModel): return math_ops.less(iteration_number, prediction_iterations) def _while_body(iteration_number, input_times, input_values, - mean_ta, covariance_ta): + input_exogenous_regressors, mean_ta, covariance_ta): """Predict self.output_window_size values.""" - prediction_ops = self.prediction_ops(input_times, input_values) + prediction_ops = self.prediction_ops( + input_times, input_values, input_exogenous_regressors) predicted_mean = prediction_ops["mean"] predicted_covariance = prediction_ops["covariance"] offset = self.output_window_size * gen_math_ops.minimum( @@ -299,20 +357,33 @@ class ARModel(model.TimeSeriesModel): if self.output_window_size < self.input_window_size: new_input_values = array_ops.concat( [input_values[:, self.output_window_size:, :], predicted_mean], 1) + new_input_exogenous_regressors = array_ops.concat( + [input_exogenous_regressors[:, -self.input_window_size:, :], + exogenous_regressors[ + :, offset:offset + self.output_window_size, :]], + axis=1) new_input_times = array_ops.concat([ - input_times[:, self.output_window_size:], + input_times[:, -self.input_window_size:], predict_times[:, offset:offset + self.output_window_size] ], 1) else: new_input_values = predicted_mean[:, -self.input_window_size:, :] + new_input_exogenous_regressors = exogenous_regressors[ + :, + offset - self.input_window_size:offset + self.output_window_size, + :] new_input_times = predict_times[ :, offset - self.input_window_size:offset + self.output_window_size] else: new_input_values = input_values + new_input_exogenous_regressors = exogenous_regressors[ + :, offset:offset + self.output_window_size, :] new_input_times = predict_times[:, offset:offset + self.output_window_size] new_input_times.set_shape(initial_input_times.get_shape()) + new_input_exogenous_regressors.set_shape( + initial_input_exogenous_regressors.get_shape()) new_mean_ta = mean_ta.write(iteration_number, predicted_mean) if isinstance(covariance_ta, tensor_array_ops.TensorArray): new_covariance_ta = covariance_ta.write(iteration_number, @@ -322,6 +393,7 @@ class ARModel(model.TimeSeriesModel): return (iteration_number + 1, new_input_times, new_input_values, + new_input_exogenous_regressors, new_mean_ta, new_covariance_ta) @@ -332,9 +404,13 @@ class ARModel(model.TimeSeriesModel): if self.loss != ARModel.SQUARED_LOSS else 0.) mean_ta_init = tensor_array_ops.TensorArray( dtype=self.dtype, size=prediction_iterations) - _, _, _, mean_ta, covariance_ta = control_flow_ops.while_loop( + _, _, _, _, mean_ta, covariance_ta = control_flow_ops.while_loop( _while_condition, _while_body, [ - 0, initial_input_times, initial_input_values, mean_ta_init, + 0, + initial_input_times, + initial_input_values, + initial_input_exogenous_regressors, + mean_ta_init, covariance_ta_init ]) @@ -366,11 +442,11 @@ class ARModel(model.TimeSeriesModel): return {"mean": predicted_mean, "covariance": predicted_covariance} - def _process_window(self, features, mode): + def _process_window(self, features, mode, exogenous_regressors): """Compute model outputs on a single window of data.""" - # TODO(agarwal): Use exogenous features times = math_ops.cast(features[TrainEvalFeatures.TIMES], dtypes.int64) values = math_ops.cast(features[TrainEvalFeatures.VALUES], dtype=self.dtype) + exogenous_regressors = math_ops.cast(exogenous_regressors, dtype=self.dtype) original_values = values # Extra shape checking for the window size (above that in @@ -395,7 +471,8 @@ class ARModel(model.TimeSeriesModel): input_values = values[:, :self.input_window_size, :] else: input_values = None - prediction_ops = self.prediction_ops(times, input_values) + prediction_ops = self.prediction_ops( + times, input_values, exogenous_regressors) prediction = prediction_ops["mean"] covariance = prediction_ops["covariance"] targets = array_ops.slice(values, [0, self.input_window_size, 0], @@ -419,7 +496,8 @@ class ARModel(model.TimeSeriesModel): return model.ModelOutputs( loss=loss, end_state=(times[:, -self.input_window_size:], - values[:, -self.input_window_size:, :]), + values[:, -self.input_window_size:, :], + exogenous_regressors[:, -self.input_window_size:, :]), predictions={"mean": prediction, "covariance": covariance, "observed": original_values[:, -self.output_window_size:]}, prediction_times=times[:, -self.output_window_size:]) @@ -454,17 +532,24 @@ class ARModel(model.TimeSeriesModel): """ features = {feature_name: ops.convert_to_tensor(feature_value) for feature_name, feature_value in features.items()} + times = features[TrainEvalFeatures.TIMES] + exogenous_regressors = self._process_exogenous_features( + times=times, + features={key: value for key, value in features.items() + if key not in [TrainEvalFeatures.TIMES, + TrainEvalFeatures.VALUES, + PredictionFeatures.STATE_TUPLE]}) if mode == estimator_lib.ModeKeys.TRAIN: # For training, we require the window size to be self.window_size as # iterating sequentially on larger windows could introduce a bias. - return self._process_window(features, mode=mode) + return self._process_window( + features, mode=mode, exogenous_regressors=exogenous_regressors) elif mode == estimator_lib.ModeKeys.EVAL: # For evaluation, we allow the user to pass in a larger window, in which # case we try to cover as much of the window as possible without # overlap. Quantitative evaluation is more efficient/correct with fixed # windows matching self.window_size (as with training), but this looping # allows easy plotting of "in-sample" predictions. - times = features[TrainEvalFeatures.TIMES] times.get_shape().assert_has_rank(2) static_window_size = times.get_shape()[1].value if (static_window_size is not None @@ -500,7 +585,9 @@ class ARModel(model.TimeSeriesModel): feature_name: feature_value[:, base_offset:base_offset + self.window_size] for feature_name, feature_value in features.items()}, - mode=mode) + mode=mode, + exogenous_regressors=exogenous_regressors[ + :, base_offset:base_offset + self.window_size]) # This code needs to be updated if new predictions are added in # self._process_window assert len(model_outputs.predictions) == 3 @@ -525,7 +612,9 @@ class ARModel(model.TimeSeriesModel): batch_size = array_ops.shape(times)[0] prediction_shape = [batch_size, self.output_window_size * num_iterations, self.num_features] - previous_state_times, previous_state_values = state + (previous_state_times, + previous_state_values, + previous_state_exogenous_regressors) = state # Make sure returned state always has windows of self.input_window_size, # even if we were passed fewer than self.input_window_size points this # time. @@ -540,14 +629,24 @@ class ARModel(model.TimeSeriesModel): self._scale_data(values)], axis=1)[:, -self.input_window_size:, :] new_state_values.set_shape((None, self.input_window_size, self.num_features)) + new_exogenous_regressors = array_ops.concat( + [previous_state_exogenous_regressors, + exogenous_regressors], axis=1)[:, -self.input_window_size:, :] + new_exogenous_regressors.set_shape( + (None, + self.input_window_size, + self.exogenous_size)) else: # There is no state to keep, and the strided slices above do not handle # input_window_size=0. new_state_times = previous_state_times new_state_values = previous_state_values + new_exogenous_regressors = previous_state_exogenous_regressors return model.ModelOutputs( loss=math_ops.reduce_mean(loss_ta.stack(), axis=0), - end_state=(new_state_times, new_state_values), + end_state=(new_state_times, + new_state_values, + new_exogenous_regressors), predictions={ "mean": array_ops.reshape( array_ops.transpose(mean_ta.stack(), [1, 0, 2, 3]), @@ -604,7 +703,8 @@ class AnomalyMixtureARModel(ARModel): num_features, anomaly_distribution=GAUSSIAN_ANOMALY, num_time_buckets=10, - hidden_layer_sizes=None): + hidden_layer_sizes=None, + exogenous_feature_columns=None): assert (anomaly_prior_probability < 1.0 and anomaly_prior_probability > 0.0) self._anomaly_prior_probability = anomaly_prior_probability @@ -619,7 +719,8 @@ class AnomalyMixtureARModel(ARModel): input_window_size=input_window_size, output_window_size=output_window_size, loss=ARModel.NORMAL_LIKELIHOOD_LOSS, - hidden_layer_sizes=hidden_layer_sizes) + hidden_layer_sizes=hidden_layer_sizes, + exogenous_feature_columns=exogenous_feature_columns) def _create_anomaly_ops(self, times, values, prediction_ops_dict): anomaly_log_param = variable_scope.get_variable( @@ -631,9 +732,9 @@ class AnomalyMixtureARModel(ARModel): # distribution. prediction_ops_dict["anomaly_params"] = gen_math_ops.exp(anomaly_log_param) - def prediction_ops(self, times, values): + def prediction_ops(self, times, values, exogenous_regressors): prediction_ops_dict = super(AnomalyMixtureARModel, self).prediction_ops( - times, values) + times, values, exogenous_regressors) self._create_anomaly_ops(times, values, prediction_ops_dict) return prediction_ops_dict diff --git a/tensorflow/contrib/timeseries/python/timeseries/ar_model_test.py b/tensorflow/contrib/timeseries/python/timeseries/ar_model_test.py index 1e1ca4e77f..d078ac8d46 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/ar_model_test.py +++ b/tensorflow/contrib/timeseries/python/timeseries/ar_model_test.py @@ -155,12 +155,15 @@ class ARModelTest(test.TestCase): state_times = np.expand_dims(train_data_times[:input_window_size], 0) state_values = np.expand_dims( train_data_values[:input_window_size, :], 0) + state_exogenous = state_times[:, :, None][:, :, :0] def prediction_input_fn(): return ({ PredictionFeatures.TIMES: training.limit_epochs( predict_times, num_epochs=1), - PredictionFeatures.STATE_TUPLE: (state_times, state_values) + PredictionFeatures.STATE_TUPLE: (state_times, + state_values, + state_exogenous) }, {}) (predictions,) = tuple(estimator.predict(input_fn=prediction_input_fn)) predicted_mean = predictions["mean"][:, 0] @@ -246,7 +249,8 @@ class ARModelTest(test.TestCase): with session.Session(): predicted_values = model.predict({ PredictionFeatures.TIMES: [[4, 6, 10]], - PredictionFeatures.STATE_TUPLE: ([[1, 2]], [[[1.], [2.]]]) + PredictionFeatures.STATE_TUPLE: ( + [[1, 2]], [[[1.], [2.]]], [[[], []]]) }) variables.global_variables_initializer().run() self.assertAllEqual(predicted_values["mean"].eval().shape, diff --git a/tensorflow/contrib/timeseries/python/timeseries/estimators.py b/tensorflow/contrib/timeseries/python/timeseries/estimators.py index 886e1846e2..f4608ca2d1 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/estimators.py +++ b/tensorflow/contrib/timeseries/python/timeseries/estimators.py @@ -190,7 +190,7 @@ class ARRegressor(TimeSeriesRegressor): def __init__( self, periodicities, input_window_size, output_window_size, - num_features, num_time_buckets=10, + num_features, exogenous_feature_columns=None, num_time_buckets=10, loss=ar_model.ARModel.NORMAL_LIKELIHOOD_LOSS, hidden_layer_sizes=None, anomaly_prior_probability=None, anomaly_distribution=None, optimizer=None, model_dir=None, config=None): @@ -205,7 +205,12 @@ class ARRegressor(TimeSeriesRegressor): output_window_size: Number of future time steps to predict. Note that setting it to > 1 empirically seems to give a better fit. num_features: The dimensionality of the time series (one for univariate, - more than one for multivariate). + more than one for multivariate). + exogenous_feature_columns: A list of `tf.feature_column`s (for example + `tf.feature_column.embedding_column`) corresponding to exogenous + features which provide extra information to the model but are not part + of the series to be predicted. Passed to + `tf.feature_column.input_layer`. num_time_buckets: Number of buckets into which to divide (time % periodicity) for generating time based features. loss: Loss function to use for training. Currently supported values are @@ -241,6 +246,7 @@ class ARRegressor(TimeSeriesRegressor): anomaly_distribution = ar_model.AnomalyMixtureARModel.GAUSSIAN_ANOMALY model = ar_model.ARModel( periodicities=periodicities, num_features=num_features, + exogenous_feature_columns=exogenous_feature_columns, num_time_buckets=num_time_buckets, input_window_size=input_window_size, output_window_size=output_window_size, loss=loss, @@ -255,6 +261,7 @@ class ARRegressor(TimeSeriesRegressor): input_window_size=input_window_size, output_window_size=output_window_size, num_features=num_features, + exogenous_feature_columns=exogenous_feature_columns, num_time_buckets=num_time_buckets, hidden_layer_sizes=hidden_layer_sizes, anomaly_prior_probability=anomaly_prior_probability, diff --git a/tensorflow/contrib/timeseries/python/timeseries/estimators_test.py b/tensorflow/contrib/timeseries/python/timeseries/estimators_test.py index 9f161c1695..eebee053f8 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/estimators_test.py +++ b/tensorflow/contrib/timeseries/python/timeseries/estimators_test.py @@ -29,6 +29,7 @@ from tensorflow.contrib.timeseries.python.timeseries import saved_model_utils from tensorflow.python.client import session from tensorflow.python.estimator import estimator_lib +from tensorflow.python.feature_column import feature_column from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.platform import test @@ -48,12 +49,17 @@ class TimeSeriesRegressorTest(test.TestCase): def _fit_restore_fit_test_template(self, estimator_fn, dtype): """Tests restoring previously fit models.""" model_dir = tempfile.mkdtemp(dir=self.get_temp_dir()) - first_estimator = estimator_fn(model_dir) + exogenous_feature_columns = ( + feature_column.numeric_column("exogenous"), + ) + first_estimator = estimator_fn(model_dir, exogenous_feature_columns) times = numpy.arange(20, dtype=numpy.int64) values = numpy.arange(20, dtype=dtype.as_numpy_dtype) + exogenous = numpy.arange(20, dtype=dtype.as_numpy_dtype) features = { feature_keys.TrainEvalFeatures.TIMES: times, - feature_keys.TrainEvalFeatures.VALUES: values + feature_keys.TrainEvalFeatures.VALUES: values, + "exogenous": exogenous } train_input_fn = input_pipeline.RandomWindowInputFn( input_pipeline.NumpyReader(features), shuffle_seed=2, num_threads=1, @@ -68,14 +74,19 @@ class TimeSeriesRegressorTest(test.TestCase): first_loss_after_fit = first_estimator.evaluate( input_fn=eval_input_fn, steps=1)["loss"] self.assertLess(first_loss_after_fit, first_loss_before_fit) - second_estimator = estimator_fn(model_dir) + second_estimator = estimator_fn(model_dir, exogenous_feature_columns) second_estimator.train(input_fn=train_input_fn, steps=2) whole_dataset_input_fn = input_pipeline.WholeDatasetInputFn( input_pipeline.NumpyReader(features)) whole_dataset_evaluation = second_estimator.evaluate( input_fn=whole_dataset_input_fn, steps=1) + exogenous_values_ten_steps = { + "exogenous": numpy.arange( + 10, dtype=dtype.as_numpy_dtype)[None, :, None] + } predict_input_fn = input_pipeline.predict_continuation_input_fn( evaluation=whole_dataset_evaluation, + exogenous_features=exogenous_values_ten_steps, steps=10) # Also tests that limit_epochs in predict_continuation_input_fn prevents # infinite iteration @@ -92,6 +103,7 @@ class TimeSeriesRegressorTest(test.TestCase): saved_prediction = saved_model_utils.predict_continuation( continue_from=whole_dataset_evaluation, steps=10, + exogenous_features=exogenous_values_ten_steps, signatures=signatures, session=sess) # Saved model predictions should be the same as Estimator predictions @@ -104,7 +116,8 @@ class TimeSeriesRegressorTest(test.TestCase): continue_from=whole_dataset_evaluation, features={ feature_keys.FilteringFeatures.TIMES: times[None, -1] + 2, - feature_keys.FilteringFeatures.VALUES: values[None, -1] + 2. + feature_keys.FilteringFeatures.VALUES: values[None, -1] + 2., + "exogenous": values[None, -1, None] + 12. }, signatures=signatures, session=sess) @@ -112,6 +125,10 @@ class TimeSeriesRegressorTest(test.TestCase): second_saved_prediction = saved_model_utils.predict_continuation( continue_from=first_filtering, steps=1, + exogenous_features={ + "exogenous": numpy.arange( + 1, dtype=dtype.as_numpy_dtype)[None, :, None] + }, signatures=signatures, session=sess) self.assertEqual( @@ -122,7 +139,8 @@ class TimeSeriesRegressorTest(test.TestCase): continue_from=first_filtering, features={ feature_keys.FilteringFeatures.TIMES: times[-1] + 3, - feature_keys.FilteringFeatures.VALUES: values[-1] + 3. + feature_keys.FilteringFeatures.VALUES: values[-1] + 3., + "exogenous": values[-1, None] + 13. }, signatures=signatures, session=sess) @@ -131,7 +149,8 @@ class TimeSeriesRegressorTest(test.TestCase): six.assertCountEqual( self, [feature_keys.FilteringFeatures.TIMES, - feature_keys.FilteringFeatures.VALUES], + feature_keys.FilteringFeatures.VALUES, + "exogenous"], signatures.signature_def[ feature_keys.SavedModelLabels.COLD_START_FILTER].inputs.keys()) batch_numpy_times = numpy.tile( @@ -142,7 +161,8 @@ class TimeSeriesRegressorTest(test.TestCase): session=sess, features={ feature_keys.FilteringFeatures.TIMES: batch_numpy_times, - feature_keys.FilteringFeatures.VALUES: batch_numpy_values + feature_keys.FilteringFeatures.VALUES: batch_numpy_values, + "exogenous": 10. + batch_numpy_values } ) predict_times = numpy.tile( @@ -150,26 +170,32 @@ class TimeSeriesRegressorTest(test.TestCase): predictions = saved_model_utils.predict_continuation( continue_from=state, times=predict_times, + exogenous_features={ + "exogenous": numpy.tile(numpy.arange( + 15, dtype=dtype.as_numpy_dtype), (10,))[None, :, None] + }, signatures=signatures, session=sess) self.assertAllEqual([10, 15, 1], predictions["mean"].shape) def test_fit_restore_fit_ar_regressor(self): - def _estimator_fn(model_dir): + def _estimator_fn(model_dir, exogenous_feature_columns): return estimators.ARRegressor( periodicities=10, input_window_size=10, output_window_size=6, num_features=1, model_dir=model_dir, config=_SeedRunConfig(), # This test is flaky with normal likelihood loss (could add more # training iterations instead). - loss=ar_model.ARModel.SQUARED_LOSS) + loss=ar_model.ARModel.SQUARED_LOSS, + exogenous_feature_columns=exogenous_feature_columns) self._fit_restore_fit_test_template(_estimator_fn, dtype=dtypes.float32) def test_fit_restore_fit_structural_ensemble_regressor(self): dtype = dtypes.float32 - def _estimator_fn(model_dir): + def _estimator_fn(model_dir, exogenous_feature_columns): return estimators.StructuralEnsembleRegressor( num_features=1, periodicities=10, model_dir=model_dir, dtype=dtype, - config=_SeedRunConfig()) + config=_SeedRunConfig(), + exogenous_feature_columns=exogenous_feature_columns) self._fit_restore_fit_test_template(_estimator_fn, dtype=dtype) -- GitLab From a36e6edab33c7a5bef2f911d4d7bb88ffc8c7de6 Mon Sep 17 00:00:00 2001 From: Yu-Cheng Ling Date: Mon, 23 Apr 2018 16:51:59 -0700 Subject: [PATCH 179/434] Handle missing params for a few ops in Toco using default values. PiperOrigin-RevId: 194007329 --- .../contrib/lite/toco/import_tensorflow.cc | 31 +++++++++++++------ 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.cc b/tensorflow/contrib/lite/toco/import_tensorflow.cc index 155d890c9f..2ed05cb372 100644 --- a/tensorflow/contrib/lite/toco/import_tensorflow.cc +++ b/tensorflow/contrib/lite/toco/import_tensorflow.cc @@ -1093,8 +1093,10 @@ void ConvertMatMulOperator(const NodeDef& node, // Transpose flags should be easy to support, but we don't have a // GraphDef with them to test on at the moment. - CHECK_EQ(GetBoolAttr(node, "transpose_a"), false); - CHECK_EQ(GetBoolAttr(node, "transpose_b"), false); + CHECK_EQ(HasAttr(node, "transpose_a") && GetBoolAttr(node, "transpose_a"), + false); + CHECK_EQ(HasAttr(node, "transpose_b") && GetBoolAttr(node, "transpose_b"), + false); CHECK(!HasAttr(node, "adjoint_a") || (GetBoolAttr(node, "adjoint_a") == false)); CHECK(!HasAttr(node, "adjoint_b") || @@ -1300,11 +1302,17 @@ void ConvertStridedSliceOperator(const NodeDef& node, } op->outputs.push_back(node.name()); - op->begin_mask = GetIntAttr(node, "begin_mask"); - op->ellipsis_mask = GetIntAttr(node, "ellipsis_mask"); - op->end_mask = GetIntAttr(node, "end_mask"); - op->new_axis_mask = GetIntAttr(node, "new_axis_mask"); - op->shrink_axis_mask = GetIntAttr(node, "shrink_axis_mask"); + op->begin_mask = + HasAttr(node, "begin_mask") ? GetIntAttr(node, "begin_mask") : 0; + op->ellipsis_mask = + HasAttr(node, "ellipsis_mask") ? GetIntAttr(node, "ellipsis_mask") : 0; + op->end_mask = HasAttr(node, "end_mask") ? GetIntAttr(node, "end_mask") : 0; + op->new_axis_mask = + HasAttr(node, "new_axis_mask") ? GetIntAttr(node, "new_axis_mask") : 0; + op->shrink_axis_mask = HasAttr(node, "shrink_axis_mask") + ? GetIntAttr(node, "shrink_axis_mask") + : 0; + model->operators.emplace_back(op); } @@ -1394,8 +1402,11 @@ void ConvertArgMaxOperator(const NodeDef& node, Model* model) { CHECK_EQ(node.op(), "ArgMax"); CheckInputsCount(node, tf_import_flags, 2); - const auto axis_data_type = GetDataTypeAttr(node, "Tidx"); - const auto output_type = GetDataTypeAttr(node, "output_type"); + const auto axis_data_type = + HasAttr(node, "Tidx") ? GetDataTypeAttr(node, "Tidx") : DT_INT32; + const auto output_type = HasAttr(node, "output_type") + ? GetDataTypeAttr(node, "output_type") + : DT_INT64; CHECK(axis_data_type == DT_INT64 || axis_data_type == DT_INT32); CHECK(output_type == DT_INT64 || output_type == DT_INT32); auto* op = new ArgMaxOperator; @@ -1772,7 +1783,7 @@ void ConvertStackOperator(const NodeDef& node, op->inputs.push_back(node.input(i)); } // Both "Stack" and "Pack" have the "axis" attribute. - op->axis = GetIntAttr(node, "axis"); + op->axis = HasAttr(node, "axis") ? GetIntAttr(node, "axis") : 0; op->outputs.push_back(node.name()); model->operators.emplace_back(op); } -- GitLab From 771f7b46d631fa510658685d1b84ffbb22ffcd55 Mon Sep 17 00:00:00 2001 From: Nupur Garg Date: Mon, 23 Apr 2018 17:10:05 -0700 Subject: [PATCH 180/434] Improve TOCO SavedModel support. PiperOrigin-RevId: 194009891 --- tensorflow/contrib/lite/python/BUILD | 45 +- tensorflow/contrib/lite/python/convert.py | 187 +++++++++ .../lite/python/convert_saved_model.py | 387 ++++++++++++------ .../lite/python/convert_saved_model_test.py | 172 ++++++-- .../convert_saved_model_to_frozen_graph.py | 106 +++++ .../python/{lite_test.py => convert_test.py} | 41 +- tensorflow/contrib/lite/python/lite.py | 204 +-------- .../contrib/lite/python/lite_constants.py | 53 +++ 8 files changed, 828 insertions(+), 367 deletions(-) create mode 100644 tensorflow/contrib/lite/python/convert.py create mode 100644 tensorflow/contrib/lite/python/convert_saved_model_to_frozen_graph.py rename tensorflow/contrib/lite/python/{lite_test.py => convert_test.py} (82%) create mode 100644 tensorflow/contrib/lite/python/lite_constants.py diff --git a/tensorflow/contrib/lite/python/BUILD b/tensorflow/contrib/lite/python/BUILD index 926896d609..e6dcc7aa09 100644 --- a/tensorflow/contrib/lite/python/BUILD +++ b/tensorflow/contrib/lite/python/BUILD @@ -39,16 +39,35 @@ py_test( py_library( name = "lite", srcs = ["lite.py"], - # data = [ - # "//tensorflow/contrib/lite/toco/python:toco_from_protos", - # ], srcs_version = "PY2AND3", visibility = ["//visibility:public"], deps = [ + ":convert", + ":convert_saved_model", ":op_hint", + ], +) + +py_library( + name = "lite_constants", + srcs = ["lite_constants.py"], + srcs_version = "PY2AND3", + deps = [ + "//tensorflow/contrib/lite/toco:toco_flags_proto_py", + ], +) + +py_library( + name = "convert", + srcs = ["convert.py"], + srcs_version = "PY2AND3", + visibility = ["//visibility:public"], + deps = [ + ":lite_constants", "//tensorflow/contrib/lite/toco:model_flags_proto_py", "//tensorflow/contrib/lite/toco:toco_flags_proto_py", "//tensorflow/contrib/lite/toco/python:tensorflow_wrap_toco", + "//tensorflow/contrib/lite/toco/python:toco_from_protos", "//tensorflow/python:platform", ], ) @@ -66,15 +85,15 @@ py_library( ) py_test( - name = "lite_test", - srcs = ["lite_test.py"], + name = "convert_test", + srcs = ["convert_test.py"], srcs_version = "PY2AND3", tags = [ "no-internal-py3", "no_oss", ], deps = [ - ":lite", + ":convert", ":op_hint", "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -84,13 +103,14 @@ py_test( ], ) -py_binary( +py_library( name = "convert_saved_model", srcs = ["convert_saved_model.py"], srcs_version = "PY2AND3", visibility = ["//visibility:public"], deps = [ - ":lite", + ":convert", + ":lite_constants", "//tensorflow/contrib/saved_model:saved_model_py", "//tensorflow/python:graph_util", "//tensorflow/python/tools:freeze_graph_lib", @@ -130,6 +150,15 @@ py_test( ], ) +py_binary( + name = "convert_saved_model_to_frozen_graph", + srcs = ["convert_saved_model_to_frozen_graph.py"], + srcs_version = "PY2AND3", + deps = [ + ":convert_saved_model", + ], +) + # Transitive dependencies of this target will be included in the pip package. py_library( name = "tf_lite_py_pip", diff --git a/tensorflow/contrib/lite/python/convert.py b/tensorflow/contrib/lite/python/convert.py new file mode 100644 index 0000000000..c4200c879b --- /dev/null +++ b/tensorflow/contrib/lite/python/convert.py @@ -0,0 +1,187 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Converts a frozen graph into a TFLite FlatBuffer.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os as _os +import subprocess as _subprocess +import tempfile as _tempfile + +from tensorflow.contrib.lite.python import lite_constants +from tensorflow.contrib.lite.toco import model_flags_pb2 as _model_flags_pb2 +from tensorflow.contrib.lite.toco import toco_flags_pb2 as _toco_flags_pb2 +from tensorflow.python.framework import dtypes as _dtypes +from tensorflow.python.platform import resource_loader as _resource_loader +from tensorflow.python.util.lazy_loader import LazyLoader + + +# Lazy load since some of the performance benchmark skylark rules +# break dependencies. +_toco_python = LazyLoader( + "tensorflow_wrap_toco", globals(), + "tensorflow.contrib.lite.toco.python." + "tensorflow_wrap_toco") +del LazyLoader + +# Find the toco_from_protos binary using the resource loader if using from +# bazel, otherwise we are in a pip where console_scripts already has +# the toco_from_protos tool. +if lite_constants.EXPERIMENTAL_USE_TOCO_API_DIRECTLY: + _toco_from_proto_bin = "" +else: + _toco_from_proto_bin = _resource_loader.get_path_to_datafile( + "../toco/python/toco_from_protos") + +if _toco_from_proto_bin and not _os.path.exists(_toco_from_proto_bin): + _toco_from_proto_bin = "toco_from_protos" + + +def toco_convert_protos(model_flags_str, toco_flags_str, input_data_str): + """Convert `input_data_str` according to model and toco parameters. + + Unless you know what you are doing consider using + the more friendly @{tf.contrib.lite.toco_convert}}. + + Args: + model_flags_str: Serialized proto describing model properties, see + `toco/model_flags.proto`. + toco_flags_str: Serialized proto describing conversion properties, see + `toco/toco_flags.proto`. + input_data_str: Input data in serialized form (e.g. a graphdef is common) + Returns: + Converted model in serialized form (e.g. a TFLITE model is common). + Raises: + RuntimeError: When conversion fails, an exception is raised with the error + message embedded. + """ + # TODO(aselle): When toco does not use fatal errors for failure, we can + # switch this on. + if not _toco_from_proto_bin: + return _toco_python.TocoConvert( + model_flags_str, toco_flags_str, input_data_str) + + with _tempfile.NamedTemporaryFile() as fp_toco, \ + _tempfile.NamedTemporaryFile() as fp_model, \ + _tempfile.NamedTemporaryFile() as fp_input, \ + _tempfile.NamedTemporaryFile() as fp_output: + fp_model.write(model_flags_str) + fp_toco.write(toco_flags_str) + fp_input.write(input_data_str) + fp_model.flush() + fp_toco.flush() + fp_input.flush() + + cmd = [ + _toco_from_proto_bin, fp_model.name, fp_toco.name, fp_input.name, + fp_output.name + ] + cmdline = " ".join(cmd) + proc = _subprocess.Popen( + cmdline, + shell=True, + stdout=_subprocess.PIPE, + stderr=_subprocess.STDOUT, + close_fds=True) + stdout, stderr = proc.communicate() + exitcode = proc.returncode + if exitcode == 0: + stuff = fp_output.read() + return stuff + else: + raise RuntimeError("TOCO failed see console for info.\n%s\n%s\n" % + (stdout, stderr)) + + +def tensor_name(x): + return x.name.split(":")[0] + + +def toco_convert(input_data, + input_tensors, + output_tensors, + inference_type=lite_constants.FLOAT, + input_format=lite_constants.TENSORFLOW_GRAPHDEF, + output_format=lite_constants.TFLITE, + quantized_input_stats=None, + drop_control_dependency=True): + """Convert a model using TOCO from `input_format` to `output_format`. + + Typically this is to convert from TensorFlow GraphDef to TFLite, in which + case the default `input_format` and `output_format` are sufficient. + + Args: + input_data: Input data (i.e. often `sess.graph_def`). + input_tensors: List of input tensors. Type and shape are computed using + `foo.get_shape()` and `foo.dtype`. + output_tensors: List of output tensors (only .name is used from this). + inference_type: Currently must be `{FLOAT, QUANTIZED_UINT8}`. + input_format: Type of data to read (currently must be TENSORFLOW_GRAPHDEF). + output_format: Type of data to write (currently must be TFLITE or + GRAPHVIZ_DOT) + quantized_input_stats: For each member of input_tensors the mean and + std deviation of training data. Only needed if `inference_type` is + `QUANTIZED_UINT8`. + drop_control_dependency: Drops control dependencies silently. This is due + to tf lite not supporting control dependencies. + + Returns: + The converted data. For example if tflite was the destination, then + this will be a tflite flatbuffer in a bytes array. + + Raises: + ValueError: If the input tensor type is unknown + RuntimeError: If TOCO fails to convert (in which case the runtime error's + error text will contain the TOCO error log) + """ + toco = _toco_flags_pb2.TocoFlags() + toco.input_format = input_format + toco.output_format = output_format + toco.drop_control_dependency = drop_control_dependency + model = _model_flags_pb2.ModelFlags() + toco.inference_type = inference_type + for idx, input_tensor in enumerate(input_tensors): + if input_tensor.dtype == _dtypes.float32: + tflite_input_type = lite_constants.FLOAT + elif input_tensor.dtype == _dtypes.int32: + tflite_input_type = lite_constants.INT32 + elif input_tensor.dtype == _dtypes.int64: + tflite_input_type = lite_constants.INT64 + # TODO(aselle): Insert strings when they are available + else: + raise ValueError("Tensors %s not known type %r" % (input_tensor.name, + input_tensor.dtype)) + + input_array = model.input_arrays.add() + + if inference_type == lite_constants.QUANTIZED_UINT8: + if tflite_input_type == lite_constants.FLOAT: + tflite_input_type = lite_constants.QUANTIZED_UINT8 + input_array.mean_value, input_array.std_value = quantized_input_stats[idx] + + input_array.name = tensor_name(input_tensor) + input_array.shape.dims.extend(map(int, input_tensor.get_shape())) + + for output_tensor in output_tensors: + model.output_arrays.append(tensor_name(output_tensor)) + + # TODO(aselle): Consider handling the case of allowing quantized + # inputs to be converted to float (via the toco.inference_input_type field). + data = toco_convert_protos(model.SerializeToString(), + toco.SerializeToString(), + input_data.SerializeToString()) + return data diff --git a/tensorflow/contrib/lite/python/convert_saved_model.py b/tensorflow/contrib/lite/python/convert_saved_model.py index a2b5ef488e..a7eddf3408 100644 --- a/tensorflow/contrib/lite/python/convert_saved_model.py +++ b/tensorflow/contrib/lite/python/convert_saved_model.py @@ -12,52 +12,43 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -r"""TensorFlow Lite flatbuffer generation from saved_models. +"""Functions to convert SavedModel to frozen GraphDefs.""" -Example: - -bazel run third_party/tensorflow/contrib/lite/python:convert_saved_model -- \ - --saved_model_dir=/tmp/test_saved_model/1519865537 \ - --output_tflite=/tmp/test.lite - -""" from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib.lite.python import lite +from tensorflow.contrib.lite.python import convert +from tensorflow.contrib.lite.python import lite_constants +from tensorflow.contrib.lite.toco import model_flags_pb2 from tensorflow.contrib.saved_model.python.saved_model import reader from tensorflow.contrib.saved_model.python.saved_model import signature_def_utils from tensorflow.core.framework import types_pb2 from tensorflow.python.client import session from tensorflow.python.framework import graph_util as tf_graph_util from tensorflow.python.framework import ops -from tensorflow.python.platform import app -from tensorflow.python.platform import flags from tensorflow.python.platform import gfile from tensorflow.python.platform import tf_logging as logging from tensorflow.python.saved_model import loader from tensorflow.python.saved_model import signature_constants from tensorflow.python.saved_model import tag_constants -flags.DEFINE_string("saved_model_dir", "", "Saved model directory to convert.") -flags.DEFINE_string("output_tflite", None, "File path to write flatbuffer.") -flags.DEFINE_string("output_arrays", None, - "List of output tensor names, the default value is None, " - "which means the conversion will keep all outputs.") -flags.DEFINE_integer("batch_size", 1, - "If input tensor shape has None at first dimension, " - "e.g. (None,224,224,3), replace None with batch_size.") -flags.DEFINE_string("tag_set", tag_constants.SERVING, - "Group of tag(s) of the MetaGraphDef in the saved_model, " - "in string format, separated by ','. For tag-set contains " - "multiple tags, all tags must be passed in.") -flags.DEFINE_string("signature_key", - signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY, - "This is signature key to extract inputs, outputs.") - - -def log_tensor_details(tensor_info): + +def _write_and_flush_file(file_path, data_str): + """Writes data to file path. + + Args: + file_path: Full path of the file to store data in. + data_str: Data represented as a string. + + Returns: None. + """ + with gfile.Open(file_path, "wb") as data_file: + data_file.write(data_str) + data_file.flush() + + +def _log_tensor_details(tensor_info): """Log tensor details: name, shape, and type.""" for key in tensor_info: val = tensor_info[key] @@ -73,7 +64,7 @@ def log_tensor_details(tensor_info): dtype) -def get_meta_graph_def(saved_model_dir, tag_set): +def _get_meta_graph_def(saved_model_dir, tag_set): """Validate saved_model and extract MetaGraphDef. Args: @@ -103,7 +94,7 @@ def get_meta_graph_def(saved_model_dir, tag_set): "values are '{}'. ".format(tag_set, tag_sets)) -def get_signature_def(meta_graph, signature_key): +def _get_signature_def(meta_graph, signature_key): """Get the signature def from meta_graph with given signature_key. Args: @@ -130,11 +121,11 @@ def get_signature_def(meta_graph, signature_key): return signature_def -def get_inputs_outputs(signature_def): - """Get inputs and outputs from signature def. +def _get_inputs_outputs(signature_def): + """Get inputs and outputs from SignatureDef. Args: - signature_def: signatuer def in the meta_graph_def for conversion. + signature_def: SignatureDef in the meta_graph_def for conversion. Returns: The inputs and outputs in the graph for conversion. @@ -142,9 +133,9 @@ def get_inputs_outputs(signature_def): inputs_tensor_info = signature_def.inputs outputs_tensor_info = signature_def.outputs logging.info("input tensors info: ") - log_tensor_details(inputs_tensor_info) + _log_tensor_details(inputs_tensor_info) logging.info("output tensors info: ") - log_tensor_details(outputs_tensor_info) + _log_tensor_details(outputs_tensor_info) def gather_names(tensor_info): return [tensor_info[key].name for key in tensor_info] @@ -154,109 +145,277 @@ def get_inputs_outputs(signature_def): return inputs, outputs -def convert(saved_model_dir, - output_tflite=None, - output_arrays=None, - tag_set=None, - signature_key=signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY, - batch_size=1): - """Convert a saved_model to tflite flatbuffer. +def _get_tensors(graph, signature_def_tensor_names=None, + user_tensor_names=None): + """Gets the tensors associated with the tensor names. + + Either signature_def_tensor_names or user_tensor_names should be provided. If + the user provides tensors, the tensors associated with the user provided + tensor names are provided. Otherwise, the tensors associated with the names in + the SignatureDef are provided. Args: - saved_model_dir: Saved model directory to convert. - output_tflite: File path to write result flatbuffer. - output_arrays: List of output tensor names, the default value is None, which - means conversion keeps all output tensors. This is also used to filter - tensors that are from Op currently not supported in tflite, e.g., Argmax). - tag_set: This is the set of tags to get meta_graph_def in saved_model. - signature_key: This is the signature key to extract inputs, outputs. - batch_size: If input tensor shape has None at first dimension, - e.g. (None,224,224,3), replace None with batch_size. + graph: GraphDef representing graph. + signature_def_tensor_names: Tensor names stored in either the inputs or + outputs of a SignatureDef. (default None) + user_tensor_names: Tensor names provided by the user. (default None) Returns: - The converted data. For example if tflite was the destination, then - this will be a tflite flatbuffer in a bytes array. + List of tensors. + + Raises: + ValueError: + signature_def_tensors and user_tensor_names are undefined or empty. + user_tensor_names are not valid. + """ + tensors = [] + if user_tensor_names: + # Get the list of all of the tensors with and without the tensor index. + all_tensor_names = [ + tensor.name for op in graph.get_operations() for tensor in op.outputs + ] + all_tensor_names_only = [name.split(":")[0] for name in all_tensor_names] + + # Sort the tensor names. + user_tensor_names = sorted(user_tensor_names) + + # Get the tensors associated with the tensor names. + tensors = [] + invalid_tensors = [] + for name in user_tensor_names: + if name not in all_tensor_names_only: + invalid_tensors.append(name) + else: + idx = all_tensor_names_only.index(name) + tensors.append(graph.get_tensor_by_name(all_tensor_names[idx])) + + # Throw ValueError if any user input names are not valid tensors. + if invalid_tensors: + raise ValueError("Invalid tensors '{}' were found.".format( + ",".join(invalid_tensors))) + elif signature_def_tensor_names: + tensors = [ + graph.get_tensor_by_name(name) + for name in sorted(signature_def_tensor_names) + ] + else: + # Throw ValueError if signature_def_tensors and user_tensor_names are both + # either undefined or empty. + raise ValueError( + "Specify either signature_def_tensor_names or user_tensor_names") + + return tensors + + +def _freeze_saved_model(saved_model_dir, input_arrays, input_shapes, + output_arrays, tag_set, signature_key, batch_size): + """Converts a SavedModel to a frozen graph. + + Args: + saved_model_dir: SavedModel directory to convert. + input_arrays: List of input tensors to freeze graph with. Uses input arrays + from SignatureDef when none are provided. (default None) + input_shapes: Map of strings representing input tensor names to list of + integers representing input shapes (e.g., {"foo": : [1, 16, 16, 3]}). + Automatically determined when input shapes is None (e.g., {"foo" : None}). + (default None) + output_arrays: List of output tensors to freeze graph with. Uses output + arrays from SignatureDef when none are provided. (default None) + tag_set: Set of tags identifying the MetaGraphDef within the SavedModel to + analyze. All tags in the tag set must be present. (default "serve") + signature_key: Key identifying SignatureDef containing inputs and outputs. + batch_size: Batch size for the model. Replaces the first dimension of an + input size array if undefined. (default 1) + + Returns: + frozen_graph_def: Frozen GraphDef. + in_tensors: List of input tensors for the graph. + out_tensors: List of output tensors for the graph. Raises: - ValueError: If tag_set does not indicate any meta_graph_def in saved_model, - or signature_key is not in relevant meta_graph_def, - or input shape has None beyond 1st dimension, e.g., (1,None, None, 3), - or given output_arrays are not valid causing empty outputs. + ValueError: + SavedModel doesn't contain a MetaGraphDef identified by tag_set. + signature_key is not in the MetaGraphDef. + input_shapes does not match the length of input_arrays. + input_shapes has a None value after the 1st dimension. + input_arrays or output_arrays are not valid. + Unable to load Session. """ + # Set default values for inputs if they are set to None. + if signature_key is None: + signature_key = signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY if tag_set is None: tag_set = set([tag_constants.SERVING]) + if batch_size is None: + batch_size = 1 - meta_graph = get_meta_graph_def(saved_model_dir, tag_set) - signature_def = get_signature_def(meta_graph, signature_key) - inputs, outputs = get_inputs_outputs(signature_def) + # Read SignatureDef. + meta_graph = _get_meta_graph_def(saved_model_dir, tag_set) + signature_def = _get_signature_def(meta_graph, signature_key) + inputs, outputs = _get_inputs_outputs(signature_def) graph = ops.Graph() with session.Session(graph=graph) as sess: - + # TODO(nupurgarg): Throw ValueError if SavedModel has assets/ directory. loader.load(sess, meta_graph.meta_info_def.tags, saved_model_dir) - in_tensors = [graph.get_tensor_by_name(input_) for input_ in inputs] - - # Users can use output_arrays to filter output tensors for conversion. - # If output_arrays is None, we keep all output tensors. In future, we may - # use tflite supported Op list and check whether op is custom Op to - # automatically filter output arrays. - # TODO(zhixianyan): Use tflite supported Op list to filter outputs. - if output_arrays is not None: - output_arrays = output_arrays.split(",") - out_tensors = [ - graph.get_tensor_by_name(output) - for output in outputs - if output.split(":")[0] in output_arrays - ] - else: - out_tensors = [graph.get_tensor_by_name(output) for output in outputs] + # Gets input and output tensors. + # TODO(zhixianyan): Use TFLite supported Op list to filter outputs. + in_tensors = _get_tensors(graph, inputs, input_arrays) + out_tensors = _get_tensors(graph, outputs, output_arrays) - output_names = [node.split(":")[0] for node in outputs] + # Gets fully defined tensor shape. An input tensor with None in the first + # dimension, e.g. (None, 224, 224, 3), is replaced with the batch_size. + # Shapes with None after the first dimension result in a ValueError. + # TODO(zhixianyan): Add supports for input tensor with more None in shape. + for tensor in in_tensors: + if (input_shapes and tensor.name in input_shapes and + input_shapes[tensor.name] is not None): + shape = input_shapes[tensor.name] + else: + shape = tensor.get_shape().as_list() - if not out_tensors: - raise ValueError( - "No valid output tensors for '{}', possible values are '{}'".format( - output_arrays, output_names)) + if None in shape[1:]: + raise ValueError( + "None is only supported in the 1st dimension. Tensor '{0}' has " + "invalid shape '{1}'.".format(tensor.name, shape)) + elif shape[0] is None: + shape[0] = batch_size + tensor.set_shape(shape) + output_names = [node.split(":")[0] for node in outputs] frozen_graph_def = tf_graph_util.convert_variables_to_constants( sess, graph.as_graph_def(), output_names) - # Toco requires fully defined tensor shape, for input tensor with None in - # their shape, e.g., (None, 224, 224, 3), we need to replace first None with - # a given batch size. For shape with more None, e.g. (None, None, None, 3), - # still be able to replace and convert, but require further investigation. - # TODO(zhixianyan): Add supports for input tensor with more None in shape. - for i in range(len(in_tensors)): - shape = in_tensors[i].get_shape().as_list() - if shape[0] is None: - shape[0] = batch_size - if None in shape[1:]: - raise ValueError( - "Only support None shape at 1st dim as batch_size. But tensor " - "'{}' 's shape '{}' has None at other dimension. ".format( - inputs[i], shape)) - in_tensors[i].set_shape(shape) + return frozen_graph_def, in_tensors, out_tensors + raise ValueError("Unable to load Session.") - result = lite.toco_convert(frozen_graph_def, in_tensors, out_tensors) - if output_tflite is not None: - with gfile.Open(output_tflite, "wb") as f: - f.write(result) - logging.info("Successfully converted to: %s", output_tflite) +def saved_model_to_frozen_graphdef( + saved_model_dir, + output_file_model, + output_file_flags, + input_arrays=None, + input_shapes=None, + output_arrays=None, + tag_set=None, + signature_key=signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY, + batch_size=1): + """Converts a SavedModel to a frozen graph. Writes graph to tmp directory. - return result + Stores frozen graph and command line flags in the tmp directory. + Args: + saved_model_dir: SavedModel directory to convert. + output_file_model: Full file path to save frozen graph. + output_file_flags: Full file path to save ModelFlags. + input_arrays: List of input tensors to freeze graph with. Uses input arrays + from SignatureDef when none are provided. (default None) + input_shapes: Map of strings representing input tensor names to list of + integers representing input shapes (e.g., {"foo": : [1, 16, 16, 3]}). + Automatically determined when input shapes is None (e.g., {"foo" : None}). + (default None) + output_arrays: List of output tensors to freeze graph with. Uses output + arrays from SignatureDef when none are provided. (default None) + tag_set: Set of tags identifying the MetaGraphDef within the SavedModel to + analyze. All tags in the tag set must be present. (default "serve") + signature_key: Key identifying SignatureDef containing inputs and outputs. + batch_size: Batch size for the model. Replaces the first dimension of an + input size array if undefined. (default 1) + + Returns: None. -def main(_): - convert( - saved_model_dir=flags.FLAGS.saved_model_dir, - output_tflite=flags.FLAGS.output_tflite, - output_arrays=flags.FLAGS.output_arrays, - batch_size=flags.FLAGS.batch_size, - tag_set=set(flags.FLAGS.tag_set.split(",")), - signature_key=flags.FLAGS.signature_key) + Raises: + ValueError: Unable to convert to frozen graph. + """ + frozen_graph_def, in_tensors, out_tensors = _freeze_saved_model( + saved_model_dir, input_arrays, input_shapes, output_arrays, tag_set, + signature_key, batch_size) + + # Initialize model flags. + model = model_flags_pb2.ModelFlags() + + for input_tensor in in_tensors: + input_array = model.input_arrays.add() + input_array.name = convert.tensor_name(input_tensor) + input_array.shape.dims.extend(map(int, input_tensor.get_shape())) + + for output_tensor in out_tensors: + model.output_arrays.append(convert.tensor_name(output_tensor)) + + # Write model and ModelFlags to file. ModelFlags contain input array and + # output array information that is parsed from the SignatureDef and used for + # analysis by TOCO. + _write_and_flush_file(output_file_model, frozen_graph_def.SerializeToString()) + _write_and_flush_file(output_file_flags, model.SerializeToString()) + + +def tflite_from_saved_model( + saved_model_dir, + output_file=None, + input_arrays=None, + input_shapes=None, + output_arrays=None, + tag_set=None, + signature_key=signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY, + batch_size=1, + inference_type=lite_constants.FLOAT, + input_format=lite_constants.TENSORFLOW_GRAPHDEF, + output_format=lite_constants.TFLITE, + quantized_input_stats=None, + drop_control_dependency=True): + """Converts a SavedModel to TFLite FlatBuffer. + Args: + saved_model_dir: SavedModel directory to convert. + output_file: File path to write result TFLite FlatBuffer. + input_arrays: List of input tensors to freeze graph with. Uses input arrays + from SignatureDef when none are provided. (default None) + input_shapes: Map of strings representing input tensor names to list of + integers representing input shapes (e.g., {"foo": : [1, 16, 16, 3]}). + Automatically determined when input shapes is None (e.g., {"foo" : None}). + (default None) + output_arrays: List of output tensors to freeze graph with. Uses output + arrays from SignatureDef when none are provided. (default None) + tag_set: Set of tags identifying the MetaGraphDef within the SavedModel to + analyze. All tags in the tag set must be present. (default "serve") + signature_key: Key identifying SignatureDef containing inputs and outputs. + batch_size: Batch size for the model. Replaces the first dimension of an + input size array if undefined. (default 1) + inference_type: Currently must be `{FLOAT, QUANTIZED_UINT8}`. + input_format: Type of data to read (currently must be TENSORFLOW_GRAPHDEF). + output_format: Type of data to write (currently must be TFLITE or + GRAPHVIZ_DOT) + quantized_input_stats: For each member of input_tensors the mean and + std deviation of training data. Only needed if `inference_type` is + `QUANTIZED_UINT8`. + drop_control_dependency: Drops control dependencies silently. This is due + to tf lite not supporting control dependencies. -if __name__ == "__main__": - app.run(main) + Returns: + The converted data. For example if tflite was the destination, then + this will be a tflite flatbuffer in a bytes array. + + Raises: + ValueError: Unable to convert to frozen graph. + """ + frozen_graph_def, in_tensors, out_tensors = _freeze_saved_model( + saved_model_dir, input_arrays, input_shapes, output_arrays, tag_set, + signature_key, batch_size) + + result = convert.toco_convert( + input_data=frozen_graph_def, + input_tensors=in_tensors, + output_tensors=out_tensors, + inference_type=inference_type, + input_format=input_format, + output_format=output_format, + quantized_input_stats=quantized_input_stats, + drop_control_dependency=drop_control_dependency) + + if output_file is not None: + with gfile.Open(output_file, "wb") as f: + f.write(result) + logging.info("Successfully converted to: %s", output_file) + + return result diff --git a/tensorflow/contrib/lite/python/convert_saved_model_test.py b/tensorflow/contrib/lite/python/convert_saved_model_test.py index 734e42d619..db95fc8ad7 100644 --- a/tensorflow/contrib/lite/python/convert_saved_model_test.py +++ b/tensorflow/contrib/lite/python/convert_saved_model_test.py @@ -12,11 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""TF Lite SavedModel Conversion test cases. - - - test on generated saved_models from simple graphs (sanity check) - - test mnist savedmodel generated on-the-fly +"""TFLite SavedModel conversion test cases. + - Tests converting simple SavedModel graph to TFLite FlatBuffer. + - Tests converting simple SavedModel graph to frozen graph. + - Tests converting MNIST SavedModel to TFLite FlatBuffer. """ from __future__ import absolute_import @@ -25,6 +25,7 @@ from __future__ import print_function import os from tensorflow.contrib.lite.python import convert_saved_model +from tensorflow.contrib.lite.toco import model_flags_pb2 as _model_flags_pb2 from tensorflow.python import keras from tensorflow.python.client import session from tensorflow.python.estimator import estimator_lib as estimator @@ -37,6 +38,7 @@ from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn from tensorflow.python.ops import random_ops from tensorflow.python.ops.losses import losses +from tensorflow.python.platform import gfile from tensorflow.python.platform import test from tensorflow.python.saved_model import saved_model from tensorflow.python.training import training as train @@ -45,7 +47,7 @@ from tensorflow.python.training import training as train class ConvertSavedModelTestBasicGraph(test_util.TensorFlowTestCase): def _createSimpleSavedModel(self, shape): - """Create a simple savedmodel on the fly.""" + """Create a simple SavedModel on the fly.""" saved_model_dir = os.path.join(self.get_temp_dir(), "simple_savedmodel") with session.Session() as sess: in_tensor = array_ops.placeholder(shape=shape, dtype=dtypes.float32) @@ -56,44 +58,78 @@ class ConvertSavedModelTestBasicGraph(test_util.TensorFlowTestCase): return saved_model_dir def testSimpleSavedModel(self): - """Test a simple savedmodel created on the fly.""" - # Create a simple savedmodel + """Test a simple SavedModel created on the fly.""" + # Create a simple SavedModel saved_model_dir = self._createSimpleSavedModel(shape=[1, 16, 16, 3]) # Convert to tflite - result = convert_saved_model.convert(saved_model_dir=saved_model_dir) + result = convert_saved_model.tflite_from_saved_model( + saved_model_dir=saved_model_dir) self.assertTrue(result) def testSimpleSavedModelWithNoneBatchSizeInShape(self): - """Test a simple savedmodel, with None in input tensor's shape.""" + """Test a simple SavedModel, with None in input tensor's shape.""" saved_model_dir = self._createSimpleSavedModel(shape=[None, 16, 16, 3]) - result = convert_saved_model.convert(saved_model_dir=saved_model_dir) + result = convert_saved_model.tflite_from_saved_model( + saved_model_dir=saved_model_dir) self.assertTrue(result) def testSimpleSavedModelWithMoreNoneInShape(self): - """Test a simple savedmodel, fail as more None in input shape.""" + """Test a simple SavedModel, fail as more None in input shape.""" saved_model_dir = self._createSimpleSavedModel(shape=[None, 16, None, 3]) # Convert to tflite: this should raise ValueError, as 3rd dim is None. with self.assertRaises(ValueError): - convert_saved_model.convert(saved_model_dir=saved_model_dir) + convert_saved_model.tflite_from_saved_model( + saved_model_dir=saved_model_dir) def testSimpleSavedModelWithWrongSignatureKey(self): - """Test a simple savedmodel, fail as given signature is invalid.""" + """Test a simple SavedModel, fail as given signature is invalid.""" saved_model_dir = self._createSimpleSavedModel(shape=[1, 16, 16, 3]) # Convert to tflite: this should raise ValueError, as # signature_key does not exit in the saved_model. with self.assertRaises(ValueError): - convert_saved_model.convert( + convert_saved_model.tflite_from_saved_model( saved_model_dir=saved_model_dir, signature_key="wrong-key") def testSimpleSavedModelWithWrongOutputArray(self): - """Test a simple savedmodel, fail as given output_arrays is invalid.""" - # Create a simple savedmodel + """Test a simple SavedModel, fail as given output_arrays is invalid.""" + # Create a simple SavedModel saved_model_dir = self._createSimpleSavedModel(shape=[1, 16, 16, 3]) # Convert to tflite: this should raise ValueError, as # output_arrays is not valid for the saved_model. with self.assertRaises(ValueError): - convert_saved_model.convert( - saved_model_dir=saved_model_dir, output_arrays="wrong-output") + convert_saved_model.tflite_from_saved_model( + saved_model_dir=saved_model_dir, output_arrays=["wrong-output"]) + + def testSimpleSavedModelWithWrongInputArrays(self): + """Test a simple SavedModel, fail as given input_arrays is invalid.""" + saved_model_dir = self._createSimpleSavedModel(shape=[1, 16, 16, 3]) + # Checks invalid input_arrays. + with self.assertRaises(ValueError): + convert_saved_model.tflite_from_saved_model( + saved_model_dir=saved_model_dir, input_arrays=["wrong-input"]) + # Checks valid and invalid input_arrays. + with self.assertRaises(ValueError): + convert_saved_model.tflite_from_saved_model( + saved_model_dir=saved_model_dir, + input_arrays=["Placeholder", "wrong-input"]) + + def testSimpleSavedModelWithCorrectArrays(self): + """Test a simple SavedModel, with correct input_arrays and output_arrays.""" + saved_model_dir = self._createSimpleSavedModel(shape=[None, 16, 16, 3]) + result = convert_saved_model.tflite_from_saved_model( + saved_model_dir=saved_model_dir, + input_arrays=["Placeholder"], + output_arrays=["add"]) + self.assertTrue(result) + + def testSimpleSavedModelWithCorrectInputArrays(self): + """Test a simple SavedModel, with correct input_arrays and input_shapes.""" + saved_model_dir = self._createSimpleSavedModel(shape=[1, 16, 16, 3]) + result = convert_saved_model.tflite_from_saved_model( + saved_model_dir=saved_model_dir, + input_arrays=["Placeholder"], + input_shapes={"Placeholder": [1, 16, 16, 3]}) + self.assertTrue(result) def testMultipleMetaGraphDef(self): """Test saved model with multiple MetaGraphDef.""" @@ -119,20 +155,103 @@ class ConvertSavedModelTestBasicGraph(test_util.TensorFlowTestCase): sess, tags=[saved_model.tag_constants.SERVING, "additional_test_tag"], signature_def_map=signature_def_map) + # MetaGraphDef 2 builder.add_meta_graph(tags=["tflite"]) builder.save(True) # Convert to tflite - convert_saved_model.convert( + convert_saved_model.tflite_from_saved_model( saved_model_dir=saved_model_dir, tag_set=set([saved_model.tag_constants.SERVING, "additional_test_tag"])) +class ConvertSavedModelTestBasicGraphToText(test_util.TensorFlowTestCase): + + def _createSimpleSavedModel(self, shape): + """Create a simple SavedModel.""" + saved_model_dir = os.path.join(self.get_temp_dir(), "simple_savedmodel") + with session.Session() as sess: + in_tensor_1 = array_ops.placeholder( + shape=shape, dtype=dtypes.float32, name="inputB") + in_tensor_2 = array_ops.placeholder( + shape=shape, dtype=dtypes.float32, name="inputA") + out_tensor = in_tensor_1 + in_tensor_2 + inputs = {"x": in_tensor_1, "y": in_tensor_2} + outputs = {"z": out_tensor} + saved_model.simple_save(sess, saved_model_dir, inputs, outputs) + return saved_model_dir + + def _getInputArrayNames(self, model_proto): + return [data.name for data in model_proto.input_arrays] + + def _getInputArrayShapes(self, model_proto): + return [ + [dim for dim in data.shape.dims] for data in model_proto.input_arrays + ] + + def _get_model_flags_proto_from_file(self, filename): + proto = _model_flags_pb2.ModelFlags() + with gfile.Open(filename, "rb") as output_file: + proto.ParseFromString(output_file.read()) + output_file.close() + return proto + + def testSimpleSavedModel(self): + """Test a simple SavedModel.""" + saved_model_dir = self._createSimpleSavedModel(shape=[1, 16, 16, 3]) + output_file_model = os.path.join(self.get_temp_dir(), "model.pb") + output_file_flags = os.path.join(self.get_temp_dir(), "model.pbtxt") + + convert_saved_model.saved_model_to_frozen_graphdef( + saved_model_dir=saved_model_dir, + output_file_model=output_file_model, + output_file_flags=output_file_flags, + input_arrays=["inputB", "inputA"]) + + proto = self._get_model_flags_proto_from_file(output_file_flags) + self.assertEqual(proto.output_arrays, ["add"]) + self.assertEqual(self._getInputArrayNames(proto), ["inputA", "inputB"]) + self.assertEqual( + self._getInputArrayShapes(proto), [[1, 16, 16, 3], [1, 16, 16, 3]]) + + def testSimpleSavedModelWithDifferentInputNames(self): + """Test a simple SavedModel.""" + saved_model_dir = self._createSimpleSavedModel(shape=[1, 16, 16, 3]) + output_file_model = os.path.join(self.get_temp_dir(), "model.pb") + output_file_flags = os.path.join(self.get_temp_dir(), "model.pbtxt") + + # Check case where input shape is given. + convert_saved_model.saved_model_to_frozen_graphdef( + saved_model_dir=saved_model_dir, + output_file_model=output_file_model, + output_file_flags=output_file_flags, + input_arrays=["inputA"], + input_shapes={"inputA": [1, 16, 16, 3]}) + + proto = self._get_model_flags_proto_from_file(output_file_flags) + self.assertEqual(proto.output_arrays, ["add"]) + self.assertEqual(self._getInputArrayNames(proto), ["inputA"]) + self.assertEqual(self._getInputArrayShapes(proto), [[1, 16, 16, 3]]) + + # Check case where input shape is None. + convert_saved_model.saved_model_to_frozen_graphdef( + saved_model_dir=saved_model_dir, + output_file_model=output_file_model, + output_file_flags=output_file_flags, + input_arrays=["inputA"], + input_shapes={"inputA": None}) + + proto = self._get_model_flags_proto_from_file(output_file_flags) + self.assertEqual(proto.output_arrays, ["add"]) + self.assertEqual(self._getInputArrayNames(proto), ["inputA"]) + self.assertEqual(self._getInputArrayShapes(proto), [[1, 16, 16, 3]]) + + class Model(keras.Model): """Model to recognize digits in the MNIST dataset. - Train and export savedmodel, used for testOnflyTrainMnistSavedModel + Train and export SavedModel, used for testOnflyTrainMnistSavedModel Network structure is equivalent to: https://github.com/tensorflow/tensorflow/blob/r1.5/tensorflow/examples/tutorials/mnist/mnist_deep.py @@ -238,7 +357,7 @@ def dummy_input_fn(): class ConvertSavedModelTestTrainGraph(test_util.TensorFlowTestCase): def testTrainedMnistSavedModel(self): - """Test mnist savedmodel, trained with dummy data and small steps.""" + """Test mnist SavedModel, trained with dummy data and small steps.""" # Build classifier classifier = estimator.Estimator( model_fn=model_fn, @@ -253,21 +372,20 @@ class ConvertSavedModelTestTrainGraph(test_util.TensorFlowTestCase): "image": image, }) - # Export savedmodel + # Export SavedModel saved_model_dir = os.path.join(self.get_temp_dir(), "mnist_savedmodel") classifier.export_savedmodel(saved_model_dir, pred_input_fn) # Convert to tflite and test output saved_model_name = os.listdir(saved_model_dir)[0] saved_model_final_dir = os.path.join(saved_model_dir, saved_model_name) - output_tflite = os.path.join(saved_model_dir, - saved_model_final_dir + ".lite") + output_file = os.path.join(saved_model_dir, saved_model_final_dir + ".lite") # TODO(zhixianyan): no need to limit output_arrays to `Softmax' # once b/74205001 fixed and argmax implemented in tflite. - result = convert_saved_model.convert( + result = convert_saved_model.tflite_from_saved_model( saved_model_dir=saved_model_final_dir, - output_arrays="Softmax", - output_tflite=output_tflite) + output_arrays=["Softmax"], + output_file=output_file) self.assertTrue(result) diff --git a/tensorflow/contrib/lite/python/convert_saved_model_to_frozen_graph.py b/tensorflow/contrib/lite/python/convert_saved_model_to_frozen_graph.py new file mode 100644 index 0000000000..4d9782f4a6 --- /dev/null +++ b/tensorflow/contrib/lite/python/convert_saved_model_to_frozen_graph.py @@ -0,0 +1,106 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Python console command for generating frozen models from SavedModels. + +This exists to add SavedModel compatibility to TOCO. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import sys +from tensorflow.contrib.lite.python.convert_saved_model import saved_model_to_frozen_graphdef +from tensorflow.python.platform import app + +FLAGS = None + + +def execute(unused_args): + """Calls function to convert the SavedModel to a frozen graph.""" + # Error handling. + if FLAGS.input_shapes and not FLAGS.input_arrays: + raise ValueError("Input shapes requires input arrays to be specified.") + + # Calls saved_model_to_frozen_graphdef function to generate frozen graph. + input_arrays = (FLAGS.input_arrays.split(",") if FLAGS.input_arrays else None) + input_shapes = None + if FLAGS.input_shapes: + input_shapes = { + input_arrays[idx]: shape.split(",") + for idx, shape in enumerate(FLAGS.input_shapes.split(":")) + } + output_arrays = ( + FLAGS.output_arrays.split(",") if FLAGS.output_arrays else None) + tag_set = set(FLAGS.tag_set.split(",")) if FLAGS.tag_set else None + + saved_model_to_frozen_graphdef( + saved_model_dir=FLAGS.saved_model_directory, + output_file_model=FLAGS.output_file_model, + output_file_flags=FLAGS.output_file_flags, + input_arrays=input_arrays, + input_shapes=input_shapes, + output_arrays=output_arrays, + tag_set=tag_set, + signature_key=FLAGS.signature_key, + batch_size=FLAGS.batch_size) + + +def main(): + global FLAGS + # Parses flags. + parser = argparse.ArgumentParser( + description="Invoke SavedModel to frozen model converter.") + parser.add_argument( + "saved_model_directory", + type=str, + help="Full path to directory containing the SavedModel.") + parser.add_argument( + "output_file_model", + type=str, + help="Full file path to save frozen graph.") + parser.add_argument( + "output_file_flags", type=str, help="Full file path to save ModelFlags.") + parser.add_argument( + "--input_arrays", + type=str, + help="Name of the input arrays, comma-separated.") + parser.add_argument( + "--input_shapes", + type=str, + help="Shapes corresponding to --input_arrays, colon-separated.") + parser.add_argument( + "--output_arrays", + type=str, + help="Name of the output arrays, comma-separated.") + parser.add_argument( + "--tag_set", type=str, help="Name of output arrays, comma-separated.") + parser.add_argument( + "--signature_key", + type=str, + help="Key identifying SignatureDef containing inputs and outputs.") + parser.add_argument( + "--batch_size", + type=int, + help="Batch size for the model. Replaces the first dimension of an " + "input size array if undefined.") + + FLAGS, unparsed = parser.parse_known_args() + + app.run(main=execute, argv=[sys.argv[0]] + unparsed) + + +if __name__ == "__main__": + main() diff --git a/tensorflow/contrib/lite/python/lite_test.py b/tensorflow/contrib/lite/python/convert_test.py similarity index 82% rename from tensorflow/contrib/lite/python/lite_test.py rename to tensorflow/contrib/lite/python/convert_test.py index b8b4510188..dc21a9b669 100644 --- a/tensorflow/contrib/lite/python/lite_test.py +++ b/tensorflow/contrib/lite/python/convert_test.py @@ -17,8 +17,9 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib.lite.python import lite -from tensorflow.contrib.lite.python.op_hint import _tensor_name_base as _tensor_name_base +from tensorflow.contrib.lite.python import convert +from tensorflow.contrib.lite.python import lite_constants +from tensorflow.contrib.lite.python import op_hint from tensorflow.python.client import session from tensorflow.python.framework import dtypes from tensorflow.python.framework import test_util @@ -29,7 +30,7 @@ from tensorflow.python.ops import math_ops from tensorflow.python.platform import test -class LiteTest(test_util.TensorFlowTestCase): +class ConvertTest(test_util.TensorFlowTestCase): def testBasic(self): in_tensor = array_ops.placeholder(shape=[1, 16, 16, 3], @@ -37,13 +38,13 @@ class LiteTest(test_util.TensorFlowTestCase): out_tensor = in_tensor + in_tensor sess = session.Session() # Try running on valid graph - result = lite.toco_convert(sess.graph_def, [in_tensor], [out_tensor]) + result = convert.toco_convert(sess.graph_def, [in_tensor], [out_tensor]) self.assertTrue(result) # TODO(aselle): remove tests that fail (we must get TOCO to not fatal # all the time). # Try running on identity graph (known fail) # with self.assertRaisesRegexp(RuntimeError, "!model->operators.empty()"): - # result = lite.toco_convert(sess.graph_def, [in_tensor], [in_tensor]) + # result = convert.toco_convert(sess.graph_def, [in_tensor], [in_tensor]) def testQuantization(self): in_tensor = array_ops.placeholder(shape=[1, 16, 16, 3], @@ -51,13 +52,14 @@ class LiteTest(test_util.TensorFlowTestCase): out_tensor = array_ops.fake_quant_with_min_max_args(in_tensor + in_tensor, min=0., max=1.) sess = session.Session() - result = lite.toco_convert(sess.graph_def, [in_tensor], [out_tensor], - inference_type=lite.QUANTIZED_UINT8, - quantized_input_stats=[(0., 1.)]) + result = convert.toco_convert( + sess.graph_def, [in_tensor], [out_tensor], + inference_type=lite_constants.QUANTIZED_UINT8, + quantized_input_stats=[(0., 1.)]) self.assertTrue(result) -class LiteTestOpHint(test_util.TensorFlowTestCase): +class ConvertTestOpHint(test_util.TensorFlowTestCase): """Test the hint to stub functionality.""" def _getGraphOpTypes(self, graphdef, output_nodes): @@ -99,7 +101,7 @@ class LiteTestOpHint(test_util.TensorFlowTestCase): swish_scale = array_ops.constant(1.0) def _swish(input_tensor, scale): - custom = lite.OpHint("cool_activation") + custom = op_hint.OpHint("cool_activation") input_tensor, scale = custom.add_inputs(input_tensor, scale) output = math_ops.sigmoid(input_tensor) * input_tensor * scale output, = custom.add_outputs(output) @@ -111,11 +113,12 @@ class LiteTestOpHint(test_util.TensorFlowTestCase): # and 1 final output). self.assertEqual(self._countIdentities(sess.graph_def.node), 4) - stubbed_graphdef = lite.convert_op_hints_to_stubs(sess) + stubbed_graphdef = op_hint.convert_op_hints_to_stubs(sess) self.assertCountEqual( self._getGraphOpTypes( - stubbed_graphdef, output_nodes=[_tensor_name_base(output)]), + stubbed_graphdef, + output_nodes=[op_hint._tensor_name_base(output)]), ["cool_activation", "Const", "Identity"]) def testScaleAndBiasAndIdentity(self): @@ -125,7 +128,7 @@ class LiteTestOpHint(test_util.TensorFlowTestCase): b = array_ops.constant([4., 5.]) def _scaled_and_bias_and_identity(a, x, b): - custom = lite.OpHint("scale_and_bias_and_identity") + custom = op_hint.OpHint("scale_and_bias_and_identity") a, x, b = custom.add_inputs(a, x, b) return custom.add_outputs(a * x + b, x) output = array_ops.identity(_scaled_and_bias_and_identity(a, x, b), @@ -136,11 +139,12 @@ class LiteTestOpHint(test_util.TensorFlowTestCase): # +1 for the final output self.assertEqual(self._countIdentities(sess.graph_def.node), 6) - stubbed_graphdef = lite.convert_op_hints_to_stubs(sess) + stubbed_graphdef = op_hint.convert_op_hints_to_stubs(sess) self.assertCountEqual( self._getGraphOpTypes( - stubbed_graphdef, output_nodes=[_tensor_name_base(output)]), + stubbed_graphdef, + output_nodes=[op_hint._tensor_name_base(output)]), ["scale_and_bias_and_identity", "Const", "Identity", "Pack"]) def testTwoFunctions(self): @@ -148,7 +152,7 @@ class LiteTestOpHint(test_util.TensorFlowTestCase): a = array_ops.constant([1.]) b = array_ops.constant([1.]) def _double_values(x): - custom = lite.OpHint("add_test") + custom = op_hint.OpHint("add_test") x = custom.add_inputs(x) output = math_ops.multiply(x, x) output, = custom.add_outputs(output) @@ -160,10 +164,11 @@ class LiteTestOpHint(test_util.TensorFlowTestCase): # make sure one identity for each input (2) and output (2) => 2 + 2 # +1 for the final output self.assertEqual(self._countIdentities(sess.graph_def.node), 5) - stubbed_graphdef = lite.convert_op_hints_to_stubs(sess) + stubbed_graphdef = op_hint.convert_op_hints_to_stubs(sess) self.assertCountEqual( self._getGraphOpTypes( - stubbed_graphdef, output_nodes=[_tensor_name_base(output)]), + stubbed_graphdef, + output_nodes=[op_hint._tensor_name_base(output)]), ["add_test", "Const", "Identity", "Add"]) diff --git a/tensorflow/contrib/lite/python/lite.py b/tensorflow/contrib/lite/python/lite.py index cf50f9d4d6..4ea40201f7 100644 --- a/tensorflow/contrib/lite/python/lite.py +++ b/tensorflow/contrib/lite/python/lite.py @@ -18,6 +18,7 @@ EXPERIMENTAL: APIs here are unstable and likely to change without notice. @@toco_convert @@toco_convert_protos +@@tflite_from_saved_model @@OpHint @@convert_op_hints_to_stubs @@ -25,208 +26,11 @@ EXPERIMENTAL: APIs here are unstable and likely to change without notice. from __future__ import absolute_import from __future__ import division from __future__ import print_function -import os as _os -import subprocess as _subprocess -import tempfile as _tempfile # pylint: disable=unused-import +from tensorflow.contrib.lite.python.convert import toco_convert +from tensorflow.contrib.lite.python.convert import toco_convert_protos +from tensorflow.contrib.lite.python.convert_saved_model import tflite_from_saved_model from tensorflow.contrib.lite.python.op_hint import convert_op_hints_to_stubs from tensorflow.contrib.lite.python.op_hint import OpHint # pylint: enable=unused-import -from tensorflow.contrib.lite.toco import model_flags_pb2 as _model_flags_pb2 -from tensorflow.contrib.lite.toco import toco_flags_pb2 as _toco_flags_pb2 -from tensorflow.contrib.lite.toco import types_pb2 as _types_pb2 -from tensorflow.python.framework import dtypes as _dtypes -from tensorflow.python.platform import resource_loader as _resource_loader -from tensorflow.python.util.all_util import remove_undocumented -from tensorflow.python.util.lazy_loader import LazyLoader - -# Lazy load since some of the performance benchmark skylark rules -# break dependencies. -_toco_python = LazyLoader( - "tensorflow_wrap_toco", globals(), - "tensorflow.contrib.lite.toco.python." - "tensorflow_wrap_toco") -del LazyLoader - -# Enum types from the protobuf promoted to the API -FLOAT = _types_pb2.FLOAT -INT32 = _types_pb2.INT32 -INT64 = _types_pb2.INT64 -STRING = _types_pb2.STRING -QUANTIZED_UINT8 = _types_pb2.QUANTIZED_UINT8 -TENSORFLOW_GRAPHDEF = _toco_flags_pb2.TENSORFLOW_GRAPHDEF -TFLITE = _toco_flags_pb2.TFLITE -GRAPHVIZ_DOT = _toco_flags_pb2.GRAPHVIZ_DOT - -# Currently the default mode of operation is to shell to another python process -# to protect against crashes. However, it breaks some dependent targets because -# it forces us to depend on an external py_binary. The experimental API doesn't -# have that drawback. -EXPERIMENTAL_USE_TOCO_API_DIRECTLY = False - -# Find the toco_from_protos binary using the resource loader if using from -# bazel, otherwise we are in a pip where console_scripts already has -# the toco_from_protos tool. -if EXPERIMENTAL_USE_TOCO_API_DIRECTLY: - _toco_from_proto_bin = "" -else: - _toco_from_proto_bin = _resource_loader.get_path_to_datafile( - "../toco/python/toco_from_protos") - -if _toco_from_proto_bin and not _os.path.exists(_toco_from_proto_bin): - _toco_from_proto_bin = "toco_from_protos" - - -def toco_convert_protos(model_flags_str, toco_flags_str, input_data_str): - """Convert `input_data_str` according to model and toco parameters. - - Unless you know what you are doing consider using - the more friendly @{tf.contrib.lite.toco_convert}}. - - Args: - model_flags_str: Serialized proto describing model properties, see - `toco/model_flags.proto`. - toco_flags_str: Serialized proto describing conversion properties, see - `toco/toco_flags.proto`. - input_data_str: Input data in serialized form (e.g. a graphdef is common) - Returns: - Converted model in serialized form (e.g. a TFLITE model is common). - Raises: - RuntimeError: When conversion fails, an exception is raised with the error - message embedded. - """ - # TODO(aselle): When toco does not use fatal errors for failure, we can - # switch this on. - if not _toco_from_proto_bin: - return _toco_python.TocoConvert( - model_flags_str, toco_flags_str, input_data_str) - - with _tempfile.NamedTemporaryFile() as fp_toco, \ - _tempfile.NamedTemporaryFile() as fp_model, \ - _tempfile.NamedTemporaryFile() as fp_input, \ - _tempfile.NamedTemporaryFile() as fp_output: - fp_model.write(model_flags_str) - fp_toco.write(toco_flags_str) - fp_input.write(input_data_str) - fp_model.flush() - fp_toco.flush() - fp_input.flush() - - cmd = [ - _toco_from_proto_bin, fp_model.name, fp_toco.name, fp_input.name, - fp_output.name - ] - cmdline = " ".join(cmd) - proc = _subprocess.Popen( - cmdline, - shell=True, - stdout=_subprocess.PIPE, - stderr=_subprocess.STDOUT, - close_fds=True) - stdout, stderr = proc.communicate() - exitcode = proc.returncode - if exitcode == 0: - stuff = fp_output.read() - return stuff - else: - raise RuntimeError("TOCO failed see console for info.\n%s\n%s\n" % - (stdout, stderr)) - - -def _tensor_name(x): - return x.name.split(":")[0] - - -def toco_convert(input_data, - input_tensors, - output_tensors, - inference_type=FLOAT, - input_format=TENSORFLOW_GRAPHDEF, - output_format=TFLITE, - quantized_input_stats=None, - drop_control_dependency=True, - allow_custom_ops=None): - """Convert a model using TOCO from `input_format` to `output_format`. - - Typically this is to convert from TensorFlow GraphDef to TFLite, in which - case the default `input_format` and `output_format` are sufficient. - - Args: - input_data: Input data (i.e. often `sess.graph_def`). - input_tensors: List of input tensors. Type and shape are computed using - `foo.get_shape()` and `foo.dtype`. - output_tensors: List of output tensors (only .name is used from this). - inference_type: Currently must be `{FLOAT, QUANTIZED_UINT8}`. - input_format: Type of data to read (currently must be TENSORFLOW_GRAPHDEF). - output_format: Type of data to write (currently must be TFLITE or - GRAPHVIZ_DOT) - quantized_input_stats: For each member of input_tensors the mean and - std deviation of training data. Only needed if `inference_type` is - `QUANTIZED_UINT8`. - drop_control_dependency: Drops control dependencies silently. This is due - to tf lite not supporting control dependencies. - - Returns: - The converted data. For example if tflite was the destination, then - this will be a tflite flatbuffer in a bytes array. - - Raises: - ValueError: If the input tensor type is unknown - RuntimeError: If TOCO fails to convert (in which case the runtime error's - error text will contain the TOCO error log) - """ - toco = _toco_flags_pb2.TocoFlags() - toco.input_format = input_format - toco.output_format = output_format - toco.inference_type = inference_type - toco.drop_control_dependency = drop_control_dependency - if allow_custom_ops is not None: - toco.allow_custom_ops = allow_custom_ops - - model = _model_flags_pb2.ModelFlags() - for idx, input_tensor in enumerate(input_tensors): - if input_tensor.dtype == _dtypes.float32: - tflite_input_type = FLOAT - elif input_tensor.dtype == _dtypes.int32: - tflite_input_type = INT32 - elif input_tensor.dtype == _dtypes.int64: - tflite_input_type = INT64 - # TODO(aselle): Insert strings when they are available - else: - raise ValueError("Tensors %s not known type %r" % (input_tensor.name, - input_tensor.dtype)) - - input_array = model.input_arrays.add() - - if inference_type == QUANTIZED_UINT8: - if tflite_input_type == FLOAT: - tflite_input_type = QUANTIZED_UINT8 - input_array.mean_value, input_array.std_value = quantized_input_stats[idx] - - input_array.name = _tensor_name(input_tensor) - input_array.shape.dims.extend(map(int, input_tensor.get_shape())) - - for output_tensor in output_tensors: - model.output_arrays.append(_tensor_name(output_tensor)) - - # TODO(aselle): Consider handling the case of allowing quantized - # inputs to be converted to float (via the toco.inference_input_type field). - data = toco_convert_protos(model.SerializeToString(), - toco.SerializeToString(), - input_data.SerializeToString()) - return data - - -_allowed_symbols = [ - "FLOAT", - "INT32", - "INT64", - "STRING", - "QUANTIZED_UINT8", - "TENSORFLOW_GRAPHDEF", - "TFLITE", - "GRAPHVIZ_DOT", - "EXPERIMENTAL_USE_TOCO_API_DIRECTLY", -] -remove_undocumented(__name__, _allowed_symbols) diff --git a/tensorflow/contrib/lite/python/lite_constants.py b/tensorflow/contrib/lite/python/lite_constants.py new file mode 100644 index 0000000000..195d7a732f --- /dev/null +++ b/tensorflow/contrib/lite/python/lite_constants.py @@ -0,0 +1,53 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Constants for TFLite.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.lite.toco import toco_flags_pb2 as _toco_flags_pb2 +from tensorflow.contrib.lite.toco import types_pb2 as _types_pb2 +from tensorflow.python.util.all_util import remove_undocumented + +# Enum types from the protobuf promoted to the API +FLOAT = _types_pb2.FLOAT +INT32 = _types_pb2.INT32 +INT64 = _types_pb2.INT64 +STRING = _types_pb2.STRING +QUANTIZED_UINT8 = _types_pb2.QUANTIZED_UINT8 +TENSORFLOW_GRAPHDEF = _toco_flags_pb2.TENSORFLOW_GRAPHDEF +TFLITE = _toco_flags_pb2.TFLITE +GRAPHVIZ_DOT = _toco_flags_pb2.GRAPHVIZ_DOT + +# Currently the default mode of operation is to shell to another python process +# to protect against crashes. However, it breaks some dependent targets because +# it forces us to depend on an external py_binary. The experimental API doesn't +# have that drawback. +EXPERIMENTAL_USE_TOCO_API_DIRECTLY = False + + +_allowed_symbols = [ + "FLOAT", + "INT32", + "INT64", + "STRING", + "QUANTIZED_UINT8", + "TENSORFLOW_GRAPHDEF", + "TFLITE", + "GRAPHVIZ_DOT", + "EXPERIMENTAL_USE_TOCO_API_DIRECTLY", +] +remove_undocumented(__name__, _allowed_symbols) -- GitLab From ecd837fd0ab69cf54d920eae3b1c73602be6c626 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 23 Apr 2018 17:14:16 -0700 Subject: [PATCH 181/434] [TF:XLA] Add a kernel for PlaceholderWithDefault PiperOrigin-RevId: 194010395 --- tensorflow/compiler/tests/BUILD | 12 +++++ tensorflow/compiler/tests/placeholder_test.py | 48 +++++++++++++++++++ .../compiler/tf2xla/kernels/identity_op.cc | 1 + 3 files changed, 61 insertions(+) create mode 100644 tensorflow/compiler/tests/placeholder_test.py diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD index ac2441cea0..0c72093256 100644 --- a/tensorflow/compiler/tests/BUILD +++ b/tensorflow/compiler/tests/BUILD @@ -923,3 +923,15 @@ tf_xla_py_test( "//tensorflow/python:platform_test", ], ) + +tf_xla_py_test( + name = "placeholder_test", + size = "small", + srcs = ["placeholder_test.py"], + deps = [ + ":xla_test", + "//tensorflow/python:array_ops", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:platform_test", + ], +) diff --git a/tensorflow/compiler/tests/placeholder_test.py b/tensorflow/compiler/tests/placeholder_test.py new file mode 100644 index 0000000000..5e6d1313bd --- /dev/null +++ b/tensorflow/compiler/tests/placeholder_test.py @@ -0,0 +1,48 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for xla handling of placeholder_with_default.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.compiler.tests.xla_test import XLATestCase +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import resource_variable_ops +from tensorflow.python.ops import variables +from tensorflow.python.platform import googletest + + +class PlaceholderTest(XLATestCase): + + def test_placeholder_with_default_default(self): + with self.test_session() as sess, self.test_scope(): + v = resource_variable_ops.ResourceVariable(4.0) + ph = array_ops.placeholder_with_default(v, shape=[]) + out = ph * 2 + sess.run(variables.variables_initializer([v])) + self.assertEqual(8.0, sess.run(out)) + + def test_placeholder_with_default_fed(self): + with self.test_session() as sess, self.test_scope(): + v = resource_variable_ops.ResourceVariable(4.0) + ph = array_ops.placeholder_with_default(v, shape=[]) + out = ph * 2 + sess.run(variables.variables_initializer([v])) + self.assertEqual(2.0, sess.run(out, {ph: 1.0})) + + +if __name__ == '__main__': + googletest.main() diff --git a/tensorflow/compiler/tf2xla/kernels/identity_op.cc b/tensorflow/compiler/tf2xla/kernels/identity_op.cc index 39af662b63..e72200bfbc 100644 --- a/tensorflow/compiler/tf2xla/kernels/identity_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/identity_op.cc @@ -38,6 +38,7 @@ class IdentityOp : public XlaOpKernel { REGISTER_XLA_OP(Name("Identity").CompilationOnly(), IdentityOp); REGISTER_XLA_OP(Name("IdentityN").CompilationOnly(), IdentityOp); +REGISTER_XLA_OP(Name("PlaceholderWithDefault"), IdentityOp); REGISTER_XLA_OP(Name("PreventGradient"), IdentityOp); REGISTER_XLA_OP(Name("StopGradient"), IdentityOp); REGISTER_XLA_OP(Name("Snapshot"), IdentityOp); -- GitLab From 80fc661853f9a0844faf95eb68438dc85a5879e3 Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Mon, 23 Apr 2018 17:16:55 -0700 Subject: [PATCH 182/434] Use tensorflow::se instead of perftools::gputools for StreamExecutor. PiperOrigin-RevId: 194010749 --- tensorflow/compiler/aot/compile.cc | 5 +- .../compiler/jit/kernels/xla_launch_op.cc | 12 ++-- .../compiler/jit/kernels/xla_launch_op.h | 2 +- .../compiler/jit/xla_compile_on_demand_op.cc | 2 +- tensorflow/compiler/jit/xla_device.cc | 2 - tensorflow/compiler/jit/xla_device.h | 13 ++-- tensorflow/compiler/jit/xla_device_context.cc | 2 - tensorflow/compiler/jit/xla_device_context.h | 15 ++--- tensorflow/compiler/jit/xla_launch_util.cc | 26 ++++---- tensorflow/compiler/jit/xla_launch_util.h | 13 ++-- tensorflow/compiler/jit/xla_tensor.cc | 9 ++- tensorflow/compiler/jit/xla_tensor.h | 3 +- .../fused_conv2d_bias_activation_op.cc | 2 +- .../kernels/adjust_hsv_in_yiq_op_gpu.cu.cc | 2 +- .../mpi_collectives/kernels/mpi_ops.cc | 2 +- tensorflow/contrib/mpi_collectives/mpi_ops.cc | 2 +- .../contrib/nccl/kernels/nccl_manager.cc | 56 ++++++++--------- .../contrib/nccl/kernels/nccl_manager.h | 36 +++++------ .../contrib/nccl/kernels/nccl_manager_test.cc | 8 +-- tensorflow/contrib/rnn/kernels/blas_gemm.cc | 11 ++-- .../contrib/tensorrt/kernels/trt_engine_op.cc | 1 - .../common_runtime/gpu/gpu_bfc_allocator.h | 8 +-- .../gpu/gpu_cudamalloc_allocator.h | 2 +- .../common_runtime/gpu/gpu_debug_allocator.cc | 6 +- .../common_runtime/gpu/gpu_debug_allocator.h | 4 +- .../core/common_runtime/gpu/gpu_device.cc | 5 +- .../core/common_runtime/gpu/gpu_event_mgr.cc | 22 +++---- .../core/common_runtime/gpu/gpu_event_mgr.h | 30 ++++----- .../common_runtime/gpu/gpu_event_mgr_test.cc | 19 +++--- .../core/common_runtime/gpu/gpu_init.cc | 8 +-- .../core/common_runtime/gpu/gpu_util.cc | 20 +++--- tensorflow/core/common_runtime/gpu/gpu_util.h | 5 +- .../core/common_runtime/gpu/pool_allocator.h | 4 +- .../common_runtime/gpu/pool_allocator_test.cc | 32 +++++----- .../core/common_runtime/gpu_device_context.h | 4 +- tensorflow/core/grappler/devices.cc | 12 ++-- tensorflow/core/kernels/avgpooling_op.cc | 24 +++---- .../core/kernels/batch_matmul_op_impl.h | 44 ++++++------- tensorflow/core/kernels/bias_op.cc | 4 +- tensorflow/core/kernels/check_numerics_op.cc | 6 +- .../core/kernels/conv_grad_filter_ops.cc | 32 +++++----- .../core/kernels/conv_grad_input_ops.cc | 28 ++++----- tensorflow/core/kernels/conv_grad_ops_3d.cc | 62 +++++++++---------- tensorflow/core/kernels/conv_ops.cc | 24 +++---- tensorflow/core/kernels/conv_ops_3d.cc | 26 ++++---- tensorflow/core/kernels/conv_ops_gpu.h | 26 ++++---- tensorflow/core/kernels/crop_and_resize_op.cc | 8 +-- tensorflow/core/kernels/cuda_device_array.h | 2 +- tensorflow/core/kernels/cuda_solvers.cc | 6 +- tensorflow/core/kernels/cuda_solvers.h | 2 +- tensorflow/core/kernels/cudnn_pooling_gpu.cc | 42 ++++++------- tensorflow/core/kernels/cudnn_pooling_gpu.h | 4 +- tensorflow/core/kernels/cudnn_rnn_ops.cc | 52 ++++++++-------- .../core/kernels/depthwise_conv_op_gpu.cu.cc | 3 +- .../kernels/dynamic_partition_op_gpu.cu.cc | 4 +- tensorflow/core/kernels/fft_ops.cc | 33 +++++----- .../core/kernels/fused_batch_norm_op.cc | 22 +++---- tensorflow/core/kernels/gpu_utils.h | 8 +-- tensorflow/core/kernels/lrn_op.cc | 12 ++-- tensorflow/core/kernels/matmul_op.cc | 51 +++++++-------- .../kernels/matrix_triangular_solve_op.cc | 31 +++++----- tensorflow/core/kernels/maxpooling_op.cc | 20 +++--- tensorflow/core/kernels/pooling_ops_3d.cc | 23 +++---- tensorflow/core/kernels/pooling_ops_common.cc | 46 +++++++------- .../core/kernels/pooling_ops_common_gpu.h | 4 +- .../core/kernels/segment_reduction_ops.cc | 4 +- tensorflow/core/kernels/where_op.cc | 5 +- .../platform/default/gpu/cupti_wrapper.cc | 42 ++++++------- tensorflow/core/platform/types.h | 4 +- 69 files changed, 509 insertions(+), 600 deletions(-) diff --git a/tensorflow/compiler/aot/compile.cc b/tensorflow/compiler/aot/compile.cc index 7c83387881..e17a7c4bf6 100644 --- a/tensorflow/compiler/aot/compile.cc +++ b/tensorflow/compiler/aot/compile.cc @@ -88,9 +88,8 @@ Status CompileGraph(const GraphDef& graph_def, const tf2xla::Config& config, // Converts the graph into an XLA computation, and compiles the // computation. // TODO(toddw): Should we let the user pick the XLA cpu vs. gpu client? - namespace gpu = perftools::gputools; - gpu::Platform* cpu_platform = - gpu::MultiPlatformManager::PlatformWithName("Host").ValueOrDie(); + se::Platform* cpu_platform = + se::MultiPlatformManager::PlatformWithName("Host").ValueOrDie(); xla::CompileOnlyClient* client = xla::ClientLibrary::GetOrCreateCompileOnlyClient(cpu_platform) .ValueOrDie(); diff --git a/tensorflow/compiler/jit/kernels/xla_launch_op.cc b/tensorflow/compiler/jit/kernels/xla_launch_op.cc index f48941fce3..03ae09ee8b 100644 --- a/tensorflow/compiler/jit/kernels/xla_launch_op.cc +++ b/tensorflow/compiler/jit/kernels/xla_launch_op.cc @@ -37,8 +37,6 @@ limitations under the License. #include "tensorflow/core/platform/stream_executor_no_cuda.h" #include "tensorflow/core/util/stream_executor_util.h" -namespace gpu = perftools::gputools; - namespace tensorflow { XlaLocalLaunchOp::XlaLocalLaunchOp(OpKernelConstruction* ctx) @@ -51,9 +49,9 @@ XlaLocalLaunchOp::XlaLocalLaunchOp(OpKernelConstruction* ctx) num_constant_args_ = constant_types.size(); OP_REQUIRES_OK(ctx, ctx->GetAttr("Nresources", &num_resource_args_)); if (device_type_ == DeviceType(DEVICE_CPU)) { - platform_id_ = gpu::host::kHostPlatformId; + platform_id_ = se::host::kHostPlatformId; } else if (device_type_ == DeviceType(DEVICE_GPU)) { - platform_id_ = gpu::cuda::kCudaPlatformId; + platform_id_ = se::cuda::kCudaPlatformId; } else { platform_id_ = nullptr; } @@ -69,7 +67,7 @@ Status XlaLocalLaunchOp::BuildCompilationCache(OpKernelContext* ctx, return Status::OK(); } - auto platform = gpu::MultiPlatformManager::PlatformWithId(platform_id_); + auto platform = se::MultiPlatformManager::PlatformWithId(platform_id_); if (!platform.ok()) { return StreamExecutorUtil::ConvertStatus(platform.status()); } @@ -100,7 +98,7 @@ void XlaLocalLaunchOp::Compute(OpKernelContext* ctx) { ResourceMgr* rm = ctx->resource_manager(); OP_REQUIRES(ctx, rm, errors::Internal("No resource manager.")); - gpu::Stream* stream = + se::Stream* stream = ctx->op_device_context() ? ctx->op_device_context()->stream() : nullptr; XlaCompilationCache* cache; @@ -153,7 +151,7 @@ void XlaLocalLaunchOp::Compute(OpKernelContext* ctx) { options.device_type = &cache->device_type(); options.flib_def = ctx->function_library()->GetFunctionLibraryDefinition(); options.graph_def_version = ctx->function_library()->graph_def_version(); - options.allow_cpu_custom_calls = (platform_id_ == gpu::host::kHostPlatformId); + options.allow_cpu_custom_calls = (platform_id_ == se::host::kHostPlatformId); options.device_allocator = xla_allocator; // TODO(b/77671268): We don't set variable_representation_shape_fn here. This // is restricted to Variables, but we need something like this to apply to diff --git a/tensorflow/compiler/jit/kernels/xla_launch_op.h b/tensorflow/compiler/jit/kernels/xla_launch_op.h index c6cc0986af..8f8e646f0f 100644 --- a/tensorflow/compiler/jit/kernels/xla_launch_op.h +++ b/tensorflow/compiler/jit/kernels/xla_launch_op.h @@ -53,7 +53,7 @@ class XlaLocalLaunchOp : public OpKernel { // Number of resource variable arguments. int num_resource_args_; - perftools::gputools::Platform::Id platform_id_; + se::Platform::Id platform_id_; TF_DISALLOW_COPY_AND_ASSIGN(XlaLocalLaunchOp); }; diff --git a/tensorflow/compiler/jit/xla_compile_on_demand_op.cc b/tensorflow/compiler/jit/xla_compile_on_demand_op.cc index 6c2782e28e..60458f6f33 100644 --- a/tensorflow/compiler/jit/xla_compile_on_demand_op.cc +++ b/tensorflow/compiler/jit/xla_compile_on_demand_op.cc @@ -58,7 +58,7 @@ Status XlaCompileOnDemandOp::Run(OpKernelContext* ctx, launch_context.PopulateInputs(ctx, result, variables); - perftools::gputools::Stream* stream = + se::Stream* stream = ctx->op_device_context() ? ctx->op_device_context()->stream() : nullptr; TF_RET_CHECK(stream); diff --git a/tensorflow/compiler/jit/xla_device.cc b/tensorflow/compiler/jit/xla_device.cc index 2c2ac839b3..7beb18c04d 100644 --- a/tensorflow/compiler/jit/xla_device.cc +++ b/tensorflow/compiler/jit/xla_device.cc @@ -51,8 +51,6 @@ limitations under the License. #include "tensorflow/core/util/device_name_utils.h" #include "tensorflow/core/util/stream_executor_util.h" -namespace se = ::perftools::gputools; - namespace tensorflow { // Caches a XlaDeviceAllocator per pair. A diff --git a/tensorflow/compiler/jit/xla_device.h b/tensorflow/compiler/jit/xla_device.h index 2f5c53aea8..3ae87308cc 100644 --- a/tensorflow/compiler/jit/xla_device.h +++ b/tensorflow/compiler/jit/xla_device.h @@ -49,20 +49,20 @@ class XlaDevice : public LocalDevice { // retrieved e.g., when lazily creating the XlaCompilationCache device. class Metadata { public: - Metadata(int device_ordinal, perftools::gputools::Platform* platform, + Metadata(int device_ordinal, se::Platform* platform, const DeviceType& device_type); // The index of the device on this host. int device_ordinal() const; - perftools::gputools::Platform* platform() const; + se::Platform* platform() const; xla::LocalClient* client() const; const DeviceType& jit_device_type() const; private: const int device_ordinal_; const DeviceType device_type_; - perftools::gputools::Platform* platform_; // Not owned. + se::Platform* platform_; // Not owned. TF_DISALLOW_COPY_AND_ASSIGN(Metadata); }; @@ -85,8 +85,7 @@ class XlaDevice : public LocalDevice { XlaDevice(const SessionOptions& options, const DeviceAttributes& attrs, int device_ordinal, const DeviceType& jit_device_name, - ::perftools::gputools::Platform* platform, - bool transfer_as_literal); + se::Platform* platform, bool transfer_as_literal); ~XlaDevice() override; Allocator* GetAllocator(AllocatorAttributes attr) override; @@ -103,7 +102,7 @@ class XlaDevice : public LocalDevice { Tensor* tensor) override; xla::LocalClient* client() const; - xla::StatusOr<::perftools::gputools::Stream*> GetStream(); + xla::StatusOr GetStream(); // If not already set, create and set GpuDeviceInfo. // Not thread-safe @@ -118,7 +117,7 @@ class XlaDevice : public LocalDevice { DeviceType jit_device_name_; // Memory allocator associated with this device. Allocator* xla_allocator_; // Not owned. - ::perftools::gputools::Platform* platform_; // Not owned. + se::Platform* platform_; // Not owned. // Stream associated with this device. Operations enqueued on this // stream are executed on the device. Operations include data // copying back and forth between CPU and the device, and diff --git a/tensorflow/compiler/jit/xla_device_context.cc b/tensorflow/compiler/jit/xla_device_context.cc index 43eb164012..bf8c1886a0 100644 --- a/tensorflow/compiler/jit/xla_device_context.cc +++ b/tensorflow/compiler/jit/xla_device_context.cc @@ -23,8 +23,6 @@ limitations under the License. #include "tensorflow/core/common_runtime/dma_helper.h" #include "tensorflow/core/platform/mem.h" -namespace se = ::perftools::gputools; - namespace tensorflow { // The allocator used for Tensors assigned to the XLA device. diff --git a/tensorflow/compiler/jit/xla_device_context.h b/tensorflow/compiler/jit/xla_device_context.h index ad914a1c23..d7f5f1d208 100644 --- a/tensorflow/compiler/jit/xla_device_context.h +++ b/tensorflow/compiler/jit/xla_device_context.h @@ -45,8 +45,7 @@ class XlaDeviceAllocator : public Allocator { // Helper class for managing data transfers between host and XLA devices. class XlaTransferManager { public: - explicit XlaTransferManager(perftools::gputools::Stream* stream, - xla::LocalClient* client, + explicit XlaTransferManager(se::Stream* stream, xla::LocalClient* client, bool transfer_as_literal); void CopyCPUTensorToDevice(const Tensor* cpu_tensor, Device* device, @@ -54,7 +53,7 @@ class XlaTransferManager { void CopyDeviceTensorToCPU(const Tensor* device_tensor, StringPiece tensor_name, Device* device, Tensor* cpu_tensor, StatusCallback done); - perftools::gputools::Stream* stream() const { return stream_; } + se::Stream* stream() const { return stream_; } private: Status TransferLiteralToDevice(const Tensor& host_tensor, @@ -64,7 +63,7 @@ class XlaTransferManager { // Stream obtained from a Device, used to transfer tensors between // CPU and device. - perftools::gputools::Stream* stream_; + se::Stream* stream_; // For the underlying memory allocator and XLA's TransferManager. xla::LocalClient* client_; // Transfer manager, for marshalling data to and from the device. @@ -78,8 +77,8 @@ class XlaTransferManager { // wraps the methods in XlaTransferManager. class XlaDeviceContext : public DeviceContext { public: - explicit XlaDeviceContext(perftools::gputools::Stream* stream, - xla::LocalClient* client, bool transfer_as_literal); + explicit XlaDeviceContext(se::Stream* stream, xla::LocalClient* client, + bool transfer_as_literal); void CopyCPUTensorToDevice(const Tensor* cpu_tensor, Device* device, Tensor* device_tensor, @@ -87,9 +86,7 @@ class XlaDeviceContext : public DeviceContext { void CopyDeviceTensorToCPU(const Tensor* device_tensor, StringPiece tensor_name, Device* device, Tensor* cpu_tensor, StatusCallback done) override; - perftools::gputools::Stream* stream() const override { - return manager_.stream(); - } + se::Stream* stream() const override { return manager_.stream(); } private: XlaTransferManager manager_; diff --git a/tensorflow/compiler/jit/xla_launch_util.cc b/tensorflow/compiler/jit/xla_launch_util.cc index 3520501c1a..2a7f04271d 100644 --- a/tensorflow/compiler/jit/xla_launch_util.cc +++ b/tensorflow/compiler/jit/xla_launch_util.cc @@ -32,13 +32,12 @@ limitations under the License. #include "tensorflow/core/framework/types.h" #include "tensorflow/core/util/stream_executor_util.h" +namespace tensorflow { namespace { -namespace gpu = perftools::gputools; using xla::ScopedShapedBuffer; using xla::ShapedBuffer; } // anonymous namespace -namespace tensorflow { std::map SnapshotResourceVariables(OpKernelContext* ctx, int num_variables) { std::map snapshot; @@ -57,24 +56,23 @@ std::map SnapshotResourceVariables(OpKernelContext* ctx, return snapshot; } -XlaAllocator::XlaAllocator(const gpu::Platform* platform, Allocator* wrapped) +XlaAllocator::XlaAllocator(const se::Platform* platform, Allocator* wrapped) : xla::DeviceMemoryAllocator(platform), wrapped_(wrapped) {} XlaAllocator::~XlaAllocator() {} -xla::StatusOr XlaAllocator::Allocate( +xla::StatusOr XlaAllocator::Allocate( int device_ordinal, uint64 size, bool retry_on_failure) { void* data = wrapped_->AllocateRaw(Allocator::kAllocatorAlignment, size); if (data == nullptr) { return errors::ResourceExhausted("Out of memory while trying to allocate ", size, " bytes."); } else { - return gpu::DeviceMemoryBase(data, size); + return se::DeviceMemoryBase(data, size); } } -Status XlaAllocator::Deallocate(int device_ordinal, - gpu::DeviceMemoryBase* mem) { +Status XlaAllocator::Deallocate(int device_ordinal, se::DeviceMemoryBase* mem) { wrapped_->DeallocateRaw(mem->opaque()); return Status::OK(); } @@ -102,7 +100,7 @@ ScopedShapedBuffer ExtractSubShapedBuffer( /*target_base_index=*/{}); for (auto& index_to_buffer : shape_tree) { if (!index_to_buffer.first.empty() && index_to_buffer.first[0] == index) { - index_to_buffer.second = gpu::DeviceMemoryBase(nullptr, 0); + index_to_buffer.second = se::DeviceMemoryBase(nullptr, 0); } } return ScopedShapedBuffer(std::move(sub_shaped_buffer), allocator); @@ -149,7 +147,7 @@ void XlaComputationLaunchContext::PopulateInputs( << xla::ShapeUtil::HumanStringWithLayout(on_device_shape) << " not the same as on-host shape " << xla::ShapeUtil::HumanStringWithLayout(shape); - gpu::DeviceMemoryBase dmem = XlaTensor::DeviceMemoryFromTensor(*t); + se::DeviceMemoryBase dmem = XlaTensor::DeviceMemoryFromTensor(*t); arg_buffers_[i] = xla::MakeUnique( /*on_host_shape=*/shape, /*on_device_shape=*/shape, client_->platform(), client_->default_device_ordinal()); @@ -162,7 +160,7 @@ void XlaComputationLaunchContext::PopulateInputs( void XlaComputationLaunchContext::PopulateOutputs( OpKernelContext* ctx, const XlaCompiler::CompilationResult* kernel, ScopedShapedBuffer output) { - gpu::Stream* stream = + se::Stream* stream = ctx->op_device_context() ? ctx->op_device_context()->stream() : nullptr; // Computation output should always be a tuple. @@ -227,7 +225,7 @@ void XlaComputationLaunchContext::PopulateOutputs( const TensorShape& shape = kernel->outputs[i].shape; VLOG(2) << "Retval " << i << " shape " << shape.DebugString(); - gpu::DeviceMemoryBase buffer = output.buffer({output_num}); + se::DeviceMemoryBase buffer = output.buffer({output_num}); if (allocate_xla_tensors_) { Tensor* output_tensor; OP_REQUIRES_OK(ctx, ctx->allocate_output(i, shape, &output_tensor)); @@ -238,7 +236,7 @@ void XlaComputationLaunchContext::PopulateOutputs( } else { Tensor output_tensor = XlaTensorBuffer::MakeTensor( ctx->expected_output_dtype(i), shape, buffer, allocator); - output.set_buffer(gpu::DeviceMemoryBase(nullptr, 0), {output_num}); + output.set_buffer(se::DeviceMemoryBase(nullptr, 0), {output_num}); ctx->set_output(i, output_tensor); } ++output_num; @@ -258,7 +256,7 @@ void XlaComputationLaunchContext::PopulateOutputs( write.input_index >= 0 && write.input_index < ctx->num_inputs(), errors::Internal("Invalid input index for variable write.")); - gpu::DeviceMemoryBase buffer = output.buffer({output_num}); + se::DeviceMemoryBase buffer = output.buffer({output_num}); Var* variable = nullptr; // TODO(b/35625933): tensorflow::Var should contain a PersistentTensor, @@ -288,7 +286,7 @@ void XlaComputationLaunchContext::PopulateOutputs( } else { Tensor output_tensor = XlaTensorBuffer::MakeTensor( write.type, write.shape, buffer, allocator); - output.set_buffer(gpu::DeviceMemoryBase(nullptr, 0), {output_num}); + output.set_buffer(se::DeviceMemoryBase(nullptr, 0), {output_num}); *variable->tensor() = output_tensor; } ++output_num; diff --git a/tensorflow/compiler/jit/xla_launch_util.h b/tensorflow/compiler/jit/xla_launch_util.h index 26dcaa8a51..8a6ff3b0c7 100644 --- a/tensorflow/compiler/jit/xla_launch_util.h +++ b/tensorflow/compiler/jit/xla_launch_util.h @@ -46,13 +46,11 @@ std::map SnapshotResourceVariables(OpKernelContext* ctx, // see comment on `AllowsAsynchronousDeallocation()`. class XlaAllocator : public xla::DeviceMemoryAllocator { public: - XlaAllocator(const perftools::gputools::Platform* platform, - Allocator* wrapped); + XlaAllocator(const se::Platform* platform, Allocator* wrapped); ~XlaAllocator() override; - xla::StatusOr Allocate( - int device_ordinal, uint64 size, bool retry_on_failure) override; - Status Deallocate(int device_ordinal, - perftools::gputools::DeviceMemoryBase* mem) override; + xla::StatusOr Allocate(int device_ordinal, uint64 size, + bool retry_on_failure) override; + Status Deallocate(int device_ordinal, se::DeviceMemoryBase* mem) override; // The Tensorflow BFC allocator used on GPU allows host-side deallocation // before GPU execution takes place. Tensorflow uses the ordering of the main @@ -126,8 +124,7 @@ class XlaTensorBuffer : public TensorBuffer { } static Tensor MakeTensor(DataType dtype, const TensorShape& shape, - perftools::gputools::DeviceMemoryBase buffer, - Allocator* allocator) { + se::DeviceMemoryBase buffer, Allocator* allocator) { size_t expected_size = shape.num_elements() * DataTypeSize(dtype); auto* tensor_buffer = new XlaTensorBuffer(buffer.opaque(), expected_size, buffer.size(), allocator); diff --git a/tensorflow/compiler/jit/xla_tensor.cc b/tensorflow/compiler/jit/xla_tensor.cc index 84b2835c40..ce6456880b 100644 --- a/tensorflow/compiler/jit/xla_tensor.cc +++ b/tensorflow/compiler/jit/xla_tensor.cc @@ -31,16 +31,15 @@ namespace tensorflow { return FromTensor(const_cast(tensor)); } -/*static*/ perftools::gputools::DeviceMemoryBase -XlaTensor::DeviceMemoryFromTensor(const Tensor& tensor) { +/*static*/ se::DeviceMemoryBase XlaTensor::DeviceMemoryFromTensor( + const Tensor& tensor) { const XlaTensor* xla_tensor = FromTensor(&tensor); if (xla_tensor) { CHECK(xla_tensor->has_shaped_buffer()); return xla_tensor->shaped_buffer().root_buffer(); } else { - return perftools::gputools::DeviceMemoryBase( - const_cast(tensor.tensor_data().data()), - tensor.tensor_data().size()); + return se::DeviceMemoryBase(const_cast(tensor.tensor_data().data()), + tensor.tensor_data().size()); } } diff --git a/tensorflow/compiler/jit/xla_tensor.h b/tensorflow/compiler/jit/xla_tensor.h index 2334fd272b..922a918973 100644 --- a/tensorflow/compiler/jit/xla_tensor.h +++ b/tensorflow/compiler/jit/xla_tensor.h @@ -43,8 +43,7 @@ class XlaTensor { // which case the returned value is shaped_buffer()->root_buffer(), or a // normal Tensor in which case the returned value is // {tensor.tensor_data().data(), tensor.tensor_data().size}. - static perftools::gputools::DeviceMemoryBase DeviceMemoryFromTensor( - const Tensor& tensor); + static se::DeviceMemoryBase DeviceMemoryFromTensor(const Tensor& tensor); // Assign the internal ShapedBuffer to new memory for the given dtype and // shape. If a ShapedBuffer exists already (has_shaped_buffer() == true), it diff --git a/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc b/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc index 1e8f011b5d..2458f7554a 100644 --- a/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc +++ b/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc @@ -247,7 +247,7 @@ class FusedConv2DBiasActivationOp : public OpKernel { }; #if GOOGLE_CUDA -namespace dnn = ::perftools::gputools::dnn; +namespace dnn = se::dnn; // A dummy type to group forward convolution autotune results together. struct ConvBiasActivationAutoTuneGroup { diff --git a/tensorflow/contrib/image/kernels/adjust_hsv_in_yiq_op_gpu.cu.cc b/tensorflow/contrib/image/kernels/adjust_hsv_in_yiq_op_gpu.cu.cc index b71ff9cd50..1be97ae3d6 100644 --- a/tensorflow/contrib/image/kernels/adjust_hsv_in_yiq_op_gpu.cu.cc +++ b/tensorflow/contrib/image/kernels/adjust_hsv_in_yiq_op_gpu.cu.cc @@ -59,7 +59,7 @@ void AdjustHsvInYiqGPU::operator()(OpKernelContext* ctx, int channel_count, delta_h, scale_s, scale_v, tranformation_matrix.flat().data(), tranformation_matrix.flat().size()); // Call cuBlas C = A * B directly. - auto no_transpose = perftools::gputools::blas::Transpose::kNoTranspose; + auto no_transpose = se::blas::Transpose::kNoTranspose; auto a_ptr = AsDeviceMemory(input->flat().data(), input->flat().size()); auto b_ptr = AsDeviceMemory(tranformation_matrix.flat().data(), diff --git a/tensorflow/contrib/mpi_collectives/kernels/mpi_ops.cc b/tensorflow/contrib/mpi_collectives/kernels/mpi_ops.cc index 8dca90a1e3..ed22ee667f 100644 --- a/tensorflow/contrib/mpi_collectives/kernels/mpi_ops.cc +++ b/tensorflow/contrib/mpi_collectives/kernels/mpi_ops.cc @@ -73,7 +73,7 @@ limitations under the License. */ template -using StatusOr = perftools::gputools::port::StatusOr; +using StatusOr = se::port::StatusOr; using CPUDevice = Eigen::ThreadPoolDevice; using GPUDevice = Eigen::GpuDevice; diff --git a/tensorflow/contrib/mpi_collectives/mpi_ops.cc b/tensorflow/contrib/mpi_collectives/mpi_ops.cc index a051ab0004..475297ca92 100644 --- a/tensorflow/contrib/mpi_collectives/mpi_ops.cc +++ b/tensorflow/contrib/mpi_collectives/mpi_ops.cc @@ -74,7 +74,7 @@ limitations under the License. */ template -using StatusOr = perftools::gputools::port::StatusOr; +using StatusOr = se::port::StatusOr; using CPUDevice = Eigen::ThreadPoolDevice; using GPUDevice = Eigen::GpuDevice; diff --git a/tensorflow/contrib/nccl/kernels/nccl_manager.cc b/tensorflow/contrib/nccl/kernels/nccl_manager.cc index b9b482a698..b1cb89391c 100644 --- a/tensorflow/contrib/nccl/kernels/nccl_manager.cc +++ b/tensorflow/contrib/nccl/kernels/nccl_manager.cc @@ -24,7 +24,7 @@ limitations under the License. namespace tensorflow { -using ::perftools::gputools::cuda::ScopedActivateExecutorContext; +using se::cuda::ScopedActivateExecutorContext; // Contains data for a single stream used for nccl communication; this includes // a background thread that calls NcclManager::LoopKernelLaunches. @@ -37,11 +37,11 @@ struct NcclManager::NcclStream { cv.notify_all(); } - perftools::gputools::StreamExecutor* executor = nullptr; + se::StreamExecutor* executor = nullptr; // The stream on which to run the nccl collective. // This is a different stream than the tensorflow compute stream. - std::unique_ptr stream; + std::unique_ptr stream; // See NcclManager::LoopKernelLaunches for information on these. std::unique_ptr thread; @@ -95,9 +95,8 @@ ncclDataType_t ToNcclType(DataType t) { // A participant in a Collective. See below. struct NcclManager::Participant { Participant(const Tensor* in_t, Tensor* out_t, EventMgr* event_mgr, - perftools::gputools::Stream* tensor_stream, - perftools::gputools::StreamExecutor* executor, int gpu_device_id, - NcclManager::DoneCallback done_callback) + se::Stream* tensor_stream, se::StreamExecutor* executor, + int gpu_device_id, NcclManager::DoneCallback done_callback) : in_t(in_t), out_t(out_t), event_mgr(event_mgr), @@ -121,11 +120,11 @@ struct NcclManager::Participant { EventMgr* const event_mgr; // Owned by the caller, who must keep it live until is called. - perftools::gputools::Stream* const tensor_stream; + se::Stream* const tensor_stream; // Matches the executor in CommunicatorMember::stream. Expected to be live for // process lifetime. - perftools::gputools::StreamExecutor* const executor = nullptr; + se::StreamExecutor* const executor = nullptr; const int gpu_device_id; @@ -245,7 +244,7 @@ NcclManager::Communicator* NcclManager::GetCommunicator( if (nccl_stream == nullptr) { nccl_stream = new NcclStream(); nccl_stream->executor = executor; - nccl_stream->stream.reset(new perftools::gputools::Stream(executor)); + nccl_stream->stream.reset(new se::Stream(executor)); nccl_stream->stream->Init(); streams.emplace_back(nccl_stream); @@ -300,10 +299,10 @@ NcclManager::Communicator* NcclManager::GetCommunicator( void NcclManager::AddToAllReduce(int num_devices, const string& key, ncclRedOp_t reduction_op, - perftools::gputools::StreamExecutor* executor, + se::StreamExecutor* executor, int gpu_device_id, EventMgr* event_mgr, - perftools::gputools::Stream* tensor_stream, - const Tensor* in_t, Tensor* out_t, + se::Stream* tensor_stream, const Tensor* in_t, + Tensor* out_t, const DoneCallback& done_callback) { std::unique_ptr participant( new Participant(in_t, out_t, event_mgr, tensor_stream, executor, @@ -312,11 +311,12 @@ void NcclManager::AddToAllReduce(int num_devices, const string& key, kAllReduce, reduction_op); } -void NcclManager::AddBroadcastSend( - int num_devices, const string& key, - perftools::gputools::StreamExecutor* executor, int gpu_device_id, - EventMgr* event_mgr, perftools::gputools::Stream* tensor_stream, - const Tensor* in_t, DoneCallback done_callback) { +void NcclManager::AddBroadcastSend(int num_devices, const string& key, + se::StreamExecutor* executor, + int gpu_device_id, EventMgr* event_mgr, + se::Stream* tensor_stream, + const Tensor* in_t, + DoneCallback done_callback) { std::unique_ptr participant( new Participant(in_t, nullptr /* out_t */, event_mgr, tensor_stream, executor, gpu_device_id, std::move(done_callback))); @@ -325,11 +325,11 @@ void NcclManager::AddBroadcastSend( kBroadcast, ncclSum /* unused */); } -void NcclManager::AddBroadcastRecv( - int num_devices, const string& key, - perftools::gputools::StreamExecutor* executor, int gpu_device_id, - EventMgr* event_mgr, perftools::gputools::Stream* tensor_stream, - Tensor* out_t, DoneCallback done_callback) { +void NcclManager::AddBroadcastRecv(int num_devices, const string& key, + se::StreamExecutor* executor, + int gpu_device_id, EventMgr* event_mgr, + se::Stream* tensor_stream, Tensor* out_t, + DoneCallback done_callback) { std::unique_ptr participant( new Participant(nullptr /* in_t */, out_t, event_mgr, tensor_stream, executor, gpu_device_id, std::move(done_callback))); @@ -339,9 +339,8 @@ void NcclManager::AddBroadcastRecv( void NcclManager::AddReduceSend(int num_devices, const string& key, ncclRedOp_t reduction_op, - perftools::gputools::StreamExecutor* executor, - int gpu_device_id, EventMgr* event_mgr, - perftools::gputools::Stream* tensor_stream, + se::StreamExecutor* executor, int gpu_device_id, + EventMgr* event_mgr, se::Stream* tensor_stream, const Tensor* in_t, DoneCallback done_callback) { std::unique_ptr participant( @@ -353,9 +352,8 @@ void NcclManager::AddReduceSend(int num_devices, const string& key, void NcclManager::AddReduceRecv(int num_devices, const string& key, ncclRedOp_t reduction_op, - perftools::gputools::StreamExecutor* executor, - int gpu_device_id, EventMgr* event_mgr, - perftools::gputools::Stream* tensor_stream, + se::StreamExecutor* executor, int gpu_device_id, + EventMgr* event_mgr, se::Stream* tensor_stream, const Tensor* in_t, Tensor* out_t, DoneCallback done_callback) { std::unique_ptr participant( @@ -444,7 +442,7 @@ void NcclManager::RunCollective(const string& key, Collective* collective) { } void NcclManager::LoopKernelLaunches(NcclStream* nccl_stream) { - perftools::gputools::Stream* comm_stream = nccl_stream->stream.get(); + se::Stream* comm_stream = nccl_stream->stream.get(); ScopedActivateExecutorContext scoped_context(nccl_stream->executor); const cudaStream_t* cu_stream = reinterpret_cast( comm_stream->implementation()->CudaStreamMemberHack()); diff --git a/tensorflow/contrib/nccl/kernels/nccl_manager.h b/tensorflow/contrib/nccl/kernels/nccl_manager.h index 6ff8cea84e..57a96c5d33 100644 --- a/tensorflow/contrib/nccl/kernels/nccl_manager.h +++ b/tensorflow/contrib/nccl/kernels/nccl_manager.h @@ -55,41 +55,34 @@ class NcclManager { // is also the stream that will use the produced data; is // not called until the next kernel launched on would see the data. void AddToAllReduce(int num_devices, const string& key, - ncclRedOp_t reduction_op, - perftools::gputools::StreamExecutor* executor, + ncclRedOp_t reduction_op, se::StreamExecutor* executor, int gpu_device_id, EventMgr* event_mgr, - perftools::gputools::Stream* tensor_stream, - const Tensor* in_t, Tensor* out_t, - const DoneCallback& done_callback); + se::Stream* tensor_stream, const Tensor* in_t, + Tensor* out_t, const DoneCallback& done_callback); // AddBroadcastSend and AddBroadcastRecv combine to sent data from one sender // to all receivers. void AddBroadcastSend(int num_devices, const string& key, - perftools::gputools::StreamExecutor* executor, - int gpu_device_id, EventMgr* event_mgr, - perftools::gputools::Stream* tensor_stream, + se::StreamExecutor* executor, int gpu_device_id, + EventMgr* event_mgr, se::Stream* tensor_stream, const Tensor* in_t, DoneCallback done_callback); void AddBroadcastRecv(int num_devices, const string& key, - perftools::gputools::StreamExecutor* executor, - int gpu_device_id, EventMgr* event_mgr, - perftools::gputools::Stream* tensor_stream, + se::StreamExecutor* executor, int gpu_device_id, + EventMgr* event_mgr, se::Stream* tensor_stream, Tensor* out_t, DoneCallback done_callback); // AddReduceSend and AddReduceRecv combine to sent data from all senders // to one receiver. void AddReduceSend(int num_devices, const string& key, - ncclRedOp_t reduction_op, - perftools::gputools::StreamExecutor* executor, + ncclRedOp_t reduction_op, se::StreamExecutor* executor, int gpu_device_id, EventMgr* event_mgr, - perftools::gputools::Stream* tensor_stream, - const Tensor* in_t, DoneCallback done_callback); + se::Stream* tensor_stream, const Tensor* in_t, + DoneCallback done_callback); void AddReduceRecv(int num_devices, const string& key, - ncclRedOp_t reduction_op, - perftools::gputools::StreamExecutor* executor, + ncclRedOp_t reduction_op, se::StreamExecutor* executor, int gpu_device_id, EventMgr* event_mgr, - perftools::gputools::Stream* tensor_stream, - const Tensor* in_t, Tensor* out_t, - DoneCallback done_callback); + se::Stream* tensor_stream, const Tensor* in_t, + Tensor* out_t, DoneCallback done_callback); private: enum CollectiveType { @@ -123,8 +116,7 @@ class NcclManager { // Maps a device to the communication streams that make up its collective. // This is used to share the stream across different communicators that // include the same device. - std::map>> + std::map>> device_to_comm_streams_ GUARDED_BY(mu_); std::vector> communicators_; diff --git a/tensorflow/contrib/nccl/kernels/nccl_manager_test.cc b/tensorflow/contrib/nccl/kernels/nccl_manager_test.cc index 06ca65e33a..4d8d922cb4 100644 --- a/tensorflow/contrib/nccl/kernels/nccl_manager_test.cc +++ b/tensorflow/contrib/nccl/kernels/nccl_manager_test.cc @@ -175,11 +175,9 @@ class NcclManagerTest : public ::testing::Test { nullptr /* step_resource_manager */); } - static perftools::gputools::DeviceMemory AsDeviceMemory( - const Scalar* cuda_memory) { - perftools::gputools::DeviceMemoryBase wrapped( - const_cast(cuda_memory)); - perftools::gputools::DeviceMemory typed(wrapped); + static se::DeviceMemory AsDeviceMemory(const Scalar* cuda_memory) { + se::DeviceMemoryBase wrapped(const_cast(cuda_memory)); + se::DeviceMemory typed(wrapped); return typed; } diff --git a/tensorflow/contrib/rnn/kernels/blas_gemm.cc b/tensorflow/contrib/rnn/kernels/blas_gemm.cc index 03006dab32..45d22b739b 100644 --- a/tensorflow/contrib/rnn/kernels/blas_gemm.cc +++ b/tensorflow/contrib/rnn/kernels/blas_gemm.cc @@ -26,9 +26,9 @@ namespace tensorflow { #if GOOGLE_CUDA namespace { template -perftools::gputools::DeviceMemory AsDeviceMemory(const T* cuda_memory) { - perftools::gputools::DeviceMemoryBase wrapped(const_cast(cuda_memory)); - perftools::gputools::DeviceMemory typed(wrapped); +se::DeviceMemory AsDeviceMemory(const T* cuda_memory) { + se::DeviceMemoryBase wrapped(const_cast(cuda_memory)); + se::DeviceMemory typed(wrapped); return typed; } } // namespace @@ -41,9 +41,8 @@ void TensorCuBlasGemm::operator()(OpKernelContext* ctx, bool transa, T alpha, const T* a, int lda, const T* b, int ldb, T beta, T* c, int ldc) { #if GOOGLE_CUDA - perftools::gputools::blas::Transpose trans[] = { - perftools::gputools::blas::Transpose::kNoTranspose, - perftools::gputools::blas::Transpose::kTranspose}; + se::blas::Transpose trans[] = {se::blas::Transpose::kNoTranspose, + se::blas::Transpose::kTranspose}; auto a_ptr = AsDeviceMemory(a); auto b_ptr = AsDeviceMemory(b); diff --git a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc index b32371b642..53ba7badca 100644 --- a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc +++ b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc @@ -25,7 +25,6 @@ limitations under the License. namespace tensorflow { static ::tensorflow::tensorrt::Logger logger; -namespace gpu = ::perftools::gputools; using IRuntime = nvinfer1::IRuntime; using Dims = nvinfer1::Dims; diff --git a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h index c2c0b020c7..ad142e9982 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h +++ b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h @@ -29,8 +29,6 @@ limitations under the License. #include "tensorflow/core/platform/types.h" #include "tensorflow/core/protobuf/config.pb.h" -namespace gpu = ::perftools::gputools; - namespace tensorflow { // A GPU memory allocator that implements a 'best-fit with coalescing' @@ -52,7 +50,7 @@ class GPUBFCAllocator : public BFCAllocator { class GPUMemAllocator : public SubAllocator { public: // Note: stream_exec cannot be null. - explicit GPUMemAllocator(perftools::gputools::StreamExecutor* stream_exec) + explicit GPUMemAllocator(se::StreamExecutor* stream_exec) : stream_exec_(stream_exec) { CHECK(stream_exec_ != nullptr); } @@ -68,13 +66,13 @@ class GPUMemAllocator : public SubAllocator { void Free(void* ptr, size_t num_bytes) override { if (ptr != nullptr) { - gpu::DeviceMemoryBase gpu_ptr(ptr); + se::DeviceMemoryBase gpu_ptr(ptr); stream_exec_->Deallocate(&gpu_ptr); } } private: - perftools::gputools::StreamExecutor* stream_exec_; // not owned, non-null + se::StreamExecutor* stream_exec_; // not owned, non-null TF_DISALLOW_COPY_AND_ASSIGN(GPUMemAllocator); }; diff --git a/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.h b/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.h index 208697361d..5043fac797 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.h +++ b/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.h @@ -44,7 +44,7 @@ class GPUcudaMallocAllocator : public VisitableAllocator { private: VisitableAllocator* base_allocator_ = nullptr; // owned - perftools::gputools::StreamExecutor* stream_exec_; // Not owned. + se::StreamExecutor* stream_exec_; // Not owned. TF_DISALLOW_COPY_AND_ASSIGN(GPUcudaMallocAllocator); }; diff --git a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.cc b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.cc index b0ca7e3109..4ff5fab866 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.cc @@ -40,8 +40,7 @@ int64* NewMask(int64 word) { int64* before_mask = NewMask(0xabababababababab); int64* after_mask = NewMask(0xcdcdcdcdcdcdcdcd); -bool CheckMask(perftools::gputools::StreamExecutor* exec, void* ptr, - int64* mask) { +bool CheckMask(se::StreamExecutor* exec, void* ptr, int64* mask) { gpu::DeviceMemory gpu_ptr{gpu::DeviceMemoryBase{ptr, MASK_BYTES}}; int64 tmp[MASK_WORDS]; @@ -62,8 +61,7 @@ bool CheckMask(perftools::gputools::StreamExecutor* exec, void* ptr, return ok; } -void InitMask(perftools::gputools::StreamExecutor* exec, void* ptr, - int64* mask) { +void InitMask(se::StreamExecutor* exec, void* ptr, int64* mask) { gpu::DeviceMemory gpu_ptr{gpu::DeviceMemoryBase{ptr, MASK_BYTES}}; if (!exec->SynchronousMemcpy(&gpu_ptr, mask, MASK_BYTES)) { LOG(FATAL) << "Could not copy debug mask"; diff --git a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h index adce3a8436..c49ec2a566 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h +++ b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h @@ -55,7 +55,7 @@ class GPUDebugAllocator : public VisitableAllocator { private: VisitableAllocator* base_allocator_ = nullptr; // owned - perftools::gputools::StreamExecutor* stream_exec_; // Not owned. + se::StreamExecutor* stream_exec_; // Not owned. TF_DISALLOW_COPY_AND_ASSIGN(GPUDebugAllocator); }; @@ -81,7 +81,7 @@ class GPUNanResetAllocator : public VisitableAllocator { private: VisitableAllocator* base_allocator_ = nullptr; // owned - perftools::gputools::StreamExecutor* stream_exec_; // Not owned. + se::StreamExecutor* stream_exec_; // Not owned. TF_DISALLOW_COPY_AND_ASSIGN(GPUNanResetAllocator); }; diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.cc b/tensorflow/core/common_runtime/gpu/gpu_device.cc index 0b9e8f9cc2..f7248ca79d 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_device.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_device.cc @@ -297,9 +297,8 @@ Status BaseGPUDevice::Init(const SessionOptions& options) { } scratch_.push_back(static_cast(scratch_buffer)); - perftools::gputools::DeviceMemory mem( - perftools::gputools::DeviceMemoryBase(scratch_buffer, - scratch_buffer_size)); + se::DeviceMemory mem( + se::DeviceMemoryBase(scratch_buffer, scratch_buffer_size)); bool ok = executor_->SynchronousMemZero( &mem, Eigen::kCudaScratchSize + sizeof(unsigned int)); diff --git a/tensorflow/core/common_runtime/gpu/gpu_event_mgr.cc b/tensorflow/core/common_runtime/gpu/gpu_event_mgr.cc index af6a59a85d..4898448476 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_event_mgr.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_event_mgr.cc @@ -18,11 +18,9 @@ limitations under the License. #include "tensorflow/core/platform/stream_executor.h" #include "tensorflow/core/protobuf/config.pb.h" -namespace gpu = ::perftools::gputools; - namespace tensorflow { -EventMgr::EventMgr(gpu::StreamExecutor* se, const GPUOptions& gpu_options) +EventMgr::EventMgr(se::StreamExecutor* se, const GPUOptions& gpu_options) : exec_(se), deferred_bytes_threshold_(gpu_options.deferred_deletion_bytes() ? gpu_options.deferred_deletion_bytes() @@ -94,7 +92,7 @@ void EventMgr::StopPollingLoop() { } } -void EventMgr::ThenDeleteTensors(perftools::gputools::Stream* stream, +void EventMgr::ThenDeleteTensors(se::Stream* stream, const TensorReferenceVector& tensors) { mutex_lock l(mu_); // TODO(jeff): We currently keep one accumulated_tensors_ object. @@ -152,16 +150,16 @@ void EventMgr::PollLoop() { polling_stopped_->Notify(); } -void EventMgr::QueueInUse(gpu::Stream* stream, InUse iu) { +void EventMgr::QueueInUse(se::Stream* stream, InUse iu) { VLOG(2) << "QueueInUse free_events_ " << free_events_.size() << " used_events_ " << used_events_.size(); // Events are created on demand, and repeatedly reused. There is no // limit placed here on the number of allocated Events. if (free_events_.empty()) { - free_events_.push_back(new gpu::Event(exec_)); + free_events_.push_back(new se::Event(exec_)); free_events_.back()->Init(); } - gpu::Event* e = free_events_.back(); + se::Event* e = free_events_.back(); free_events_.pop_back(); stream->ThenRecordEvent(e); iu.event = e; @@ -199,18 +197,18 @@ void EventMgr::PollEvents(bool is_dedicated_poller, // the first non-complete record that is still pending. for (auto& iu : used_events_) { if (iu.event == nullptr) continue; - gpu::Event::Status s = iu.event->PollForStatus(); + se::Event::Status s = iu.event->PollForStatus(); switch (s) { - case gpu::Event::Status::kUnknown: - case gpu::Event::Status::kError: + case se::Event::Status::kUnknown: + case se::Event::Status::kError: // We don't expect to see these. Someday maybe propagate // a Status error, but for now fail hard. LOG(FATAL) << "Unexpected Event status: " << static_cast(s); break; - case gpu::Event::Status::kPending: + case se::Event::Status::kPending: if (!is_dedicated_poller) return; // quit processing queue break; - case gpu::Event::Status::kComplete: + case se::Event::Status::kComplete: // Make a copy of the InUse record so we can free it after releasing // the lock to_free->push_back(iu); diff --git a/tensorflow/core/common_runtime/gpu/gpu_event_mgr.h b/tensorflow/core/common_runtime/gpu/gpu_event_mgr.h index fd5f50ca4e..b26f88a201 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_event_mgr.h +++ b/tensorflow/core/common_runtime/gpu/gpu_event_mgr.h @@ -44,14 +44,13 @@ class GPUOptions; // Events are recorded. class EventMgr { public: - EventMgr(perftools::gputools::StreamExecutor* se, - const GPUOptions& gpu_options); + EventMgr(se::StreamExecutor* se, const GPUOptions& gpu_options); ~EventMgr(); // Releases the references on the elements of "tensors" as soon as // all events currently enqueued on "stream" have completed. - void ThenDeleteTensors(perftools::gputools::Stream* stream, + void ThenDeleteTensors(se::Stream* stream, const TensorReferenceVector& tensors); struct BufRec { @@ -65,8 +64,7 @@ class EventMgr { // Takes ownership of *bufrec.buf and calls bufrec.alloc->DeallocateRaw() // on it as soon as all events currently enqueued on *stream have completed. - inline void ThenDeleteBuffer(perftools::gputools::Stream* stream, - BufRec bufrec) { + inline void ThenDeleteBuffer(se::Stream* stream, BufRec bufrec) { ToFreeVector to_free; { mutex_lock l(mu_); @@ -76,8 +74,7 @@ class EventMgr { FreeMemory(to_free); } - inline void ThenExecute(perftools::gputools::Stream* stream, - std::function func) { + inline void ThenExecute(se::Stream* stream, std::function func) { ToFreeVector to_free; { mutex_lock l(mu_); @@ -89,7 +86,7 @@ class EventMgr { private: friend class TEST_EventMgrHelper; - perftools::gputools::StreamExecutor* const exec_; + se::StreamExecutor* const exec_; const int64 deferred_bytes_threshold_; const int32 polling_active_delay_usecs_; mutex mu_; @@ -98,7 +95,7 @@ class EventMgr { void FlushAccumulatedTensors() EXCLUSIVE_LOCKS_REQUIRED(mu_); struct InUse { - perftools::gputools::Event* event; + se::Event* event; TensorReferenceVector* mem; BufRec bufrec; std::function func; @@ -130,22 +127,21 @@ class EventMgr { // Stream-enqueue an unused Event and save with it a collection of // Tensors and/or a BufRec to be deleted only after the Event // records. - void QueueInUse(perftools::gputools::Stream* stream, InUse in_use) + void QueueInUse(se::Stream* stream, InUse in_use) EXCLUSIVE_LOCKS_REQUIRED(mu_); - void QueueTensors(perftools::gputools::Stream* stream, - TensorReferenceVector* tensors) + void QueueTensors(se::Stream* stream, TensorReferenceVector* tensors) EXCLUSIVE_LOCKS_REQUIRED(mu_) { QueueInUse(stream, {nullptr, tensors, BufRec(), nullptr}); } - void QueueBuffer(perftools::gputools::Stream* stream, BufRec bufrec) + void QueueBuffer(se::Stream* stream, BufRec bufrec) EXCLUSIVE_LOCKS_REQUIRED(mu_) { QueueInUse(stream, {nullptr, nullptr, bufrec, nullptr}); } - void QueueFunc(perftools::gputools::Stream* stream, - std::function func) EXCLUSIVE_LOCKS_REQUIRED(mu_) { + void QueueFunc(se::Stream* stream, std::function func) + EXCLUSIVE_LOCKS_REQUIRED(mu_) { QueueInUse(stream, {nullptr, nullptr, BufRec(), std::move(func)}); } @@ -166,10 +162,10 @@ class EventMgr { void StopPollingLoop(); // A stack of unused events - std::vector free_events_ GUARDED_BY(mu_); + std::vector free_events_ GUARDED_BY(mu_); // Buffered list of tensors waiting to have an event queued for deletion - perftools::gputools::Stream* accumulated_stream_ GUARDED_BY(mu_); + se::Stream* accumulated_stream_ GUARDED_BY(mu_); TensorReferenceVector* accumulated_tensors_ GUARDED_BY(mu_); // Sum of the TotalBytes() of the tensors in "accumulated_tensors_" int64 accumulated_tensor_bytes_ GUARDED_BY(mu_); diff --git a/tensorflow/core/common_runtime/gpu/gpu_event_mgr_test.cc b/tensorflow/core/common_runtime/gpu/gpu_event_mgr_test.cc index 3ad0b0eb85..1d4ad957b9 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_event_mgr_test.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_event_mgr_test.cc @@ -23,8 +23,6 @@ limitations under the License. #include "tensorflow/core/platform/test.h" #include "tensorflow/core/protobuf/config.pb.h" -namespace gpu = ::perftools::gputools; - namespace tensorflow { class TEST_EventMgrHelper { @@ -47,8 +45,7 @@ class TEST_EventMgrHelper { return em_->free_events_.size(); } - void QueueTensors(perftools::gputools::Stream* stream, - TensorReferenceVector* tensors) { + void QueueTensors(se::Stream* stream, TensorReferenceVector* tensors) { mutex_lock l(em_->mu_); em_->QueueTensors(stream, tensors); } @@ -121,7 +118,7 @@ TEST(EventMgr, DelayedPolling) { TEST_EventMgrHelper th(&em); EXPECT_EQ(0, th.queue_size()); TensorReferenceVector* v = nullptr; - std::unique_ptr stream(new gpu::Stream(stream_exec)); + std::unique_ptr stream(new se::Stream(stream_exec)); CHECK(stream.get()); stream->Init(); for (int i = 0; i < 5; ++i) { @@ -153,7 +150,7 @@ TEST(EventMgr, FlushLargeTensorImmediately) { EventMgr em(stream_exec, GPUOptions()); TEST_EventMgrHelper th(&em); EXPECT_EQ(0, live_tensor_bytes); - std::unique_ptr stream(new gpu::Stream(stream_exec)); + std::unique_ptr stream(new se::Stream(stream_exec)); CHECK(stream.get()); stream->Init(); for (int i = 0; i < 5; ++i) { @@ -170,7 +167,7 @@ TEST(EventMgr, ManySmallTensorsFlushedImmediately) { EventMgr em(stream_exec, GPUOptions()); TEST_EventMgrHelper th(&em); EXPECT_EQ(0, live_tensor_bytes); - std::unique_ptr stream(new gpu::Stream(stream_exec)); + std::unique_ptr stream(new se::Stream(stream_exec)); CHECK(stream.get()); stream->Init(); for (int i = 0; i < 5; ++i) { @@ -189,8 +186,8 @@ TEST(EventMgr, StreamSwitchingFlushesImmediately) { EventMgr em(stream_exec, GPUOptions()); TEST_EventMgrHelper th(&em); EXPECT_EQ(0, live_tensor_bytes); - std::unique_ptr stream1(new gpu::Stream(stream_exec)); - std::unique_ptr stream2(new gpu::Stream(stream_exec)); + std::unique_ptr stream1(new se::Stream(stream_exec)); + std::unique_ptr stream2(new se::Stream(stream_exec)); stream1->Init(); stream2->Init(); TensorReferenceVector v1; @@ -211,7 +208,7 @@ TEST(EventMgr, ManySmallTensorsSeparateCallsFlushed) { EventMgr em(stream_exec, GPUOptions()); TEST_EventMgrHelper th(&em); EXPECT_EQ(0, live_tensor_bytes); - std::unique_ptr stream(new gpu::Stream(stream_exec)); + std::unique_ptr stream(new se::Stream(stream_exec)); CHECK(stream.get()); stream->Init(); for (int i = 0; i < 5; ++i) { @@ -234,7 +231,7 @@ TEST(EventMgr, NonEmptyShutdown) { TEST_EventMgrHelper th(&em); EXPECT_EQ(0, th.queue_size()); EXPECT_EQ(0, th.free_size()); - std::unique_ptr stream(new gpu::Stream(stream_exec)); + std::unique_ptr stream(new se::Stream(stream_exec)); CHECK(stream.get()); stream->Init(); for (int i = 0; i < 5; ++i) { diff --git a/tensorflow/core/common_runtime/gpu/gpu_init.cc b/tensorflow/core/common_runtime/gpu/gpu_init.cc index aa23e3cc61..ff96891a2a 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_init.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_init.cc @@ -26,12 +26,10 @@ limitations under the License. #include "tensorflow/core/platform/types.h" #include "tensorflow/core/util/stream_executor_util.h" -namespace gpu = ::perftools::gputools; - namespace tensorflow { Status ValidateGPUMachineManager() { - auto result = gpu::MultiPlatformManager::PlatformWithName("CUDA"); + auto result = se::MultiPlatformManager::PlatformWithName("CUDA"); if (!result.ok()) { return StreamExecutorUtil::ConvertStatus(result.status()); } @@ -39,8 +37,8 @@ Status ValidateGPUMachineManager() { return Status::OK(); } -gpu::Platform* GPUMachineManager() { - auto result = gpu::MultiPlatformManager::PlatformWithName("CUDA"); +se::Platform* GPUMachineManager() { + auto result = se::MultiPlatformManager::PlatformWithName("CUDA"); if (!result.ok()) { LOG(FATAL) << "Could not find Platform with name CUDA"; return nullptr; diff --git a/tensorflow/core/common_runtime/gpu/gpu_util.cc b/tensorflow/core/common_runtime/gpu/gpu_util.cc index 5214ceaae5..7ba853fa51 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_util.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_util.cc @@ -55,19 +55,15 @@ limitations under the License. const tensorflow::int64 FLAGS_brain_gpu_util_debug_string_maxlen = 128; extern bool FLAGS_brain_gpu_record_mem_types; -using perftools::gputools::DeviceMemoryBase; -using perftools::gputools::Stream; - namespace tensorflow { -// TODO(b/77980417): Remove this and use the regular tensorflow::se alias once -// that's available. -namespace gpu = ::stream_executor; +using se::DeviceMemoryBase; +using se::Stream; Status PrepareCopy(Device* device, const DeviceContext* ctx, const Tensor& src, const Tensor* dst, const DeviceBase::GpuDeviceInfo** dev_info, - gpu::Stream** stream) { + se::Stream** stream) { if (device == nullptr) { return errors::Internal("Unexpected null device."); } @@ -122,7 +118,7 @@ void GPUUtil::SetProtoFromGPU(const Tensor& tensor, Device* dev, StatusCallback done) { VLOG(1) << "SetProtoFromGPU device_context " << device_context; const DeviceBase::GpuDeviceInfo* dev_info = nullptr; - gpu::Stream* send_stream = nullptr; + se::Stream* send_stream = nullptr; Status s = PrepareCopy(dev, device_context, tensor, nullptr, &dev_info, &send_stream); if (!s.ok()) { @@ -197,7 +193,7 @@ void GPUUtil::DeviceToDeviceCopy(DeviceContext* send_dev_context, const Tensor* input, Tensor* output, StatusCallback done) { const DeviceBase::GpuDeviceInfo* dev_info = nullptr; - gpu::Stream* send_stream = nullptr; + se::Stream* send_stream = nullptr; Status s = PrepareCopy(src, send_dev_context, *input, output, &dev_info, &send_stream); if (!s.ok()) { @@ -264,7 +260,7 @@ void GPUUtil::CopyGPUTensorToCPU(Device* gpu_device, StatusCallback done) { VLOG(1) << "CopyGPUTensorToCPU"; const DeviceBase::GpuDeviceInfo* dev_info = nullptr; - gpu::Stream* send_stream = nullptr; + se::Stream* send_stream = nullptr; Status s = PrepareCopy(gpu_device, device_context, *gpu_tensor, cpu_tensor, &dev_info, &send_stream); if (!s.ok()) { @@ -309,7 +305,7 @@ void GPUUtil::CopyCPUTensorToGPU(const Tensor* cpu_tensor, StatusCallback done) { VLOG(1) << "CopyCPUTensorToGPU"; const DeviceBase::GpuDeviceInfo* dev_info = nullptr; - gpu::Stream* recv_stream = nullptr; + se::Stream* recv_stream = nullptr; Status s = PrepareCopy(gpu_device, device_context, *cpu_tensor, gpu_tensor, &dev_info, &recv_stream); if (!s.ok()) { @@ -432,7 +428,7 @@ void GPUUtil::CopyGPUTensorToSameGPU(Device* gpu_device, StatusCallback done) { VLOG(1) << "CopyGPUTensorToSameGPU"; const DeviceBase::GpuDeviceInfo* dev_info = nullptr; - gpu::Stream* send_stream = nullptr; + se::Stream* send_stream = nullptr; Status s = PrepareCopy(gpu_device, device_context, *src_gpu_tensor, dst_gpu_tensor, &dev_info, &send_stream); if (!s.ok()) { diff --git a/tensorflow/core/common_runtime/gpu/gpu_util.h b/tensorflow/core/common_runtime/gpu/gpu_util.h index 337dc89895..0c69a17eaa 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_util.h +++ b/tensorflow/core/common_runtime/gpu/gpu_util.h @@ -74,10 +74,9 @@ class GPUUtil { // NOTE: will be removed soon, see StreamExecutorUtil::AsDeviceMemory // instead. template - static perftools::gputools::DeviceMemory AsDeviceMemory(const Tensor& t) { + static se::DeviceMemory AsDeviceMemory(const Tensor& t) { T* ptr = reinterpret_cast(const_cast(DMAHelper::base(&t))); - return perftools::gputools::DeviceMemory( - perftools::gputools::DeviceMemoryBase(ptr, t.TotalBytes())); + return se::DeviceMemory(se::DeviceMemoryBase(ptr, t.TotalBytes())); } // Computes a checksum over the contents of "tensor", which is allocated diff --git a/tensorflow/core/common_runtime/gpu/pool_allocator.h b/tensorflow/core/common_runtime/gpu/pool_allocator.h index 91ce830df8..310158aba1 100644 --- a/tensorflow/core/common_runtime/gpu/pool_allocator.h +++ b/tensorflow/core/common_runtime/gpu/pool_allocator.h @@ -181,7 +181,7 @@ class BasicCPUAllocator : public SubAllocator { class CUDAHostAllocator : public SubAllocator { public: // Note: stream_exec cannot be null. - explicit CUDAHostAllocator(perftools::gputools::StreamExecutor* stream_exec) + explicit CUDAHostAllocator(se::StreamExecutor* stream_exec) : stream_exec_(stream_exec) { CHECK(stream_exec_ != nullptr); } @@ -206,7 +206,7 @@ class CUDAHostAllocator : public SubAllocator { } private: - perftools::gputools::StreamExecutor* stream_exec_; // not owned, non-null + se::StreamExecutor* stream_exec_; // not owned, non-null TF_DISALLOW_COPY_AND_ASSIGN(CUDAHostAllocator); }; diff --git a/tensorflow/core/common_runtime/gpu/pool_allocator_test.cc b/tensorflow/core/common_runtime/gpu/pool_allocator_test.cc index 85555955e3..a4c8d5fe86 100644 --- a/tensorflow/core/common_runtime/gpu/pool_allocator_test.cc +++ b/tensorflow/core/common_runtime/gpu/pool_allocator_test.cc @@ -20,18 +20,16 @@ limitations under the License. #include "tensorflow/core/platform/stream_executor.h" #include "tensorflow/core/platform/test.h" -namespace gpu = ::perftools::gputools; - namespace tensorflow { namespace { TEST(PoolAllocatorTest, ZeroSizeBuffers) { - gpu::Platform* platform = - gpu::MultiPlatformManager::PlatformWithName("cuda").ValueOrDie(); + se::Platform* platform = + se::MultiPlatformManager::PlatformWithName("cuda").ValueOrDie(); PoolAllocator pool( 2 /*pool_size_limit*/, false /*auto_resize*/, new CUDAHostAllocator( - platform->GetExecutor(gpu::StreamExecutorConfig(/*ordinal=*/0)) + platform->GetExecutor(se::StreamExecutorConfig(/*ordinal=*/0)) .ValueOrDie()), new NoopRounder, "pool"); @@ -44,12 +42,12 @@ TEST(PoolAllocatorTest, ZeroSizeBuffers) { } TEST(PoolAllocatorTest, ZeroSizePool) { - gpu::Platform* platform = - gpu::MultiPlatformManager::PlatformWithName("cuda").ValueOrDie(); + se::Platform* platform = + se::MultiPlatformManager::PlatformWithName("cuda").ValueOrDie(); PoolAllocator pool( 0 /*pool_size_limit*/, false /*auto_resize*/, new CUDAHostAllocator( - platform->GetExecutor(gpu::StreamExecutorConfig(/*ordinal=*/0)) + platform->GetExecutor(se::StreamExecutorConfig(/*ordinal=*/0)) .ValueOrDie()), new NoopRounder, "pool"); @@ -77,12 +75,12 @@ TEST(PoolAllocatorTest, ZeroSizePool) { } TEST(PoolAllocatorTest, Alignment) { - gpu::Platform* platform = - gpu::MultiPlatformManager::PlatformWithName("cuda").ValueOrDie(); + se::Platform* platform = + se::MultiPlatformManager::PlatformWithName("cuda").ValueOrDie(); PoolAllocator pool( 0 /*pool_size_limit*/, false /*auto_resize*/, new CUDAHostAllocator( - platform->GetExecutor(gpu::StreamExecutorConfig(/*ordinal=*/0)) + platform->GetExecutor(se::StreamExecutorConfig(/*ordinal=*/0)) .ValueOrDie()), new NoopRounder, "pool"); for (int i = 0; i < 16; ++i) { @@ -123,12 +121,12 @@ TEST(PoolAllocatorTest, AutoResize) { } TEST(PoolAllocatorTest, CudaHostAllocator) { - gpu::Platform* platform = - gpu::MultiPlatformManager::PlatformWithName("cuda").ValueOrDie(); + se::Platform* platform = + se::MultiPlatformManager::PlatformWithName("cuda").ValueOrDie(); PoolAllocator pool( 2 /*pool_size_limit*/, false /*auto_resize*/, new CUDAHostAllocator( - platform->GetExecutor(gpu::StreamExecutorConfig(/*ordinal=*/0)) + platform->GetExecutor(se::StreamExecutorConfig(/*ordinal=*/0)) .ValueOrDie()), new NoopRounder, "pool"); @@ -200,12 +198,12 @@ TEST(PoolAllocatorTest, Pow2Rounder) { } TEST(PoolAllocatorTest, Name) { - gpu::Platform* platform = - gpu::MultiPlatformManager::PlatformWithName("cuda").ValueOrDie(); + se::Platform* platform = + se::MultiPlatformManager::PlatformWithName("cuda").ValueOrDie(); PoolAllocator pool( 2 /*pool_size_limit*/, false /*auto_resize*/, new CUDAHostAllocator( - platform->GetExecutor(gpu::StreamExecutorConfig(/*ordinal=*/0)) + platform->GetExecutor(se::StreamExecutorConfig(/*ordinal=*/0)) .ValueOrDie()), new NoopRounder, "pool"); EXPECT_EQ("pool", pool.Name()); diff --git a/tensorflow/core/common_runtime/gpu_device_context.h b/tensorflow/core/common_runtime/gpu_device_context.h index 38a18cd087..a1ad2c2277 100644 --- a/tensorflow/core/common_runtime/gpu_device_context.h +++ b/tensorflow/core/common_runtime/gpu_device_context.h @@ -63,8 +63,8 @@ class GPUDeviceContext : public DeviceContext { Device* device, Tensor* cpu_tensor, StatusCallback done) override; - void MaintainLifetimeOnStream( - const Tensor* t, perftools::gputools::Stream* stream) const override {} + void MaintainLifetimeOnStream(const Tensor* t, + se::Stream* stream) const override {} private: int stream_id_; diff --git a/tensorflow/core/grappler/devices.cc b/tensorflow/core/grappler/devices.cc index 2be894a08b..3268697671 100644 --- a/tensorflow/core/grappler/devices.cc +++ b/tensorflow/core/grappler/devices.cc @@ -31,15 +31,14 @@ int GetNumAvailableGPUs() { int num_eligible_gpus = 0; #if GOOGLE_CUDA if (ValidateGPUMachineManager().ok()) { - perftools::gputools::Platform* gpu_manager = GPUMachineManager(); + se::Platform* gpu_manager = GPUMachineManager(); if (gpu_manager != nullptr) { int num_gpus = gpu_manager->VisibleDeviceCount(); for (int i = 0; i < num_gpus; i++) { auto exec_status = gpu_manager->ExecutorForDevice(i); if (exec_status.ok()) { - perftools::gputools::StreamExecutor* se = exec_status.ValueOrDie(); - const perftools::gputools::DeviceDescription& desc = - se->GetDeviceDescription(); + se::StreamExecutor* se = exec_status.ValueOrDie(); + const se::DeviceDescription& desc = se->GetDeviceDescription(); int min_gpu_core_count = 8; if (desc.core_count() >= min_gpu_core_count) { num_eligible_gpus++; @@ -57,10 +56,9 @@ int GetNumAvailableGPUs() { int64 AvailableGPUMemory(int gpu_id) { #if GOOGLE_CUDA // Look up the device, to see its attributes. - perftools::gputools::Platform* gpu_platform = GPUMachineManager(); + se::Platform* gpu_platform = GPUMachineManager(); CHECK_LT(gpu_id, gpu_platform->VisibleDeviceCount()); - perftools::gputools::StreamExecutor* se = - gpu_platform->ExecutorForDevice(gpu_id).ValueOrDie(); + se::StreamExecutor* se = gpu_platform->ExecutorForDevice(gpu_id).ValueOrDie(); int64 total_memory, available_memory; CHECK(se->DeviceMemoryUsage(&available_memory, &total_memory)); diff --git a/tensorflow/core/kernels/avgpooling_op.cc b/tensorflow/core/kernels/avgpooling_op.cc index c581d1451f..ba38e1a188 100644 --- a/tensorflow/core/kernels/avgpooling_op.cc +++ b/tensorflow/core/kernels/avgpooling_op.cc @@ -156,10 +156,10 @@ class AvgPoolingOp : public UnaryOp { TensorShape output_shape = params.forward_output_shape(); if (data_format_ == FORMAT_NCHW) { - DnnPoolingOp::Compute( - context, perftools::gputools::dnn::PoolingMode::kAverage, ksize_, - stride_, padding_, data_format_, tensor_in, output_shape, - /*propagate_nans=*/false); + DnnPoolingOp::Compute(context, se::dnn::PoolingMode::kAverage, ksize_, + stride_, padding_, data_format_, tensor_in, + output_shape, + /*propagate_nans=*/false); } else { Tensor* output = nullptr; OP_REQUIRES_OK(context, @@ -417,10 +417,10 @@ class AvgPoolingGradOp : public OpKernel { output_shape.AddDim(shape_vec(i)); } - DnnPoolingGradOp::Compute( - context, perftools::gputools::dnn::PoolingMode::kAverage, ksize_, - stride_, padding_, data_format_, nullptr, nullptr, out_backprop, - output_shape, /*propagate_nans=*/false); + DnnPoolingGradOp::Compute(context, se::dnn::PoolingMode::kAverage, + ksize_, stride_, padding_, data_format_, + nullptr, nullptr, out_backprop, output_shape, + /*propagate_nans=*/false); } private: @@ -547,10 +547,10 @@ class AvgPoolingGradOpCustomGPUKernel : public OpKernel { output->flat().data(), // bottom_diff context->eigen_gpu_device()); // d } else { - DnnPoolingGradOp::Compute( - context, perftools::gputools::dnn::PoolingMode::kAverage, ksize_, - stride_, padding_, data_format_, nullptr, nullptr, out_backprop, - output_shape, /*propagate_nans=*/false); + DnnPoolingGradOp::Compute(context, se::dnn::PoolingMode::kAverage, + ksize_, stride_, padding_, data_format_, + nullptr, nullptr, out_backprop, output_shape, + /*propagate_nans=*/false); } } diff --git a/tensorflow/core/kernels/batch_matmul_op_impl.h b/tensorflow/core/kernels/batch_matmul_op_impl.h index 43e716c542..a1c03f9918 100644 --- a/tensorflow/core/kernels/batch_matmul_op_impl.h +++ b/tensorflow/core/kernels/batch_matmul_op_impl.h @@ -245,35 +245,35 @@ struct LaunchBatchMatMul { namespace { template -perftools::gputools::DeviceMemory AsDeviceMemory(const T* cuda_memory) { - perftools::gputools::DeviceMemoryBase wrapped(const_cast(cuda_memory)); - perftools::gputools::DeviceMemory typed(wrapped); +se::DeviceMemory AsDeviceMemory(const T* cuda_memory) { + se::DeviceMemoryBase wrapped(const_cast(cuda_memory)); + se::DeviceMemory typed(wrapped); return typed; } -class CublasScratchAllocator : public perftools::gputools::ScratchAllocator { +class CublasScratchAllocator : public se::ScratchAllocator { public: - using Stream = ::perftools::gputools::Stream; - using DeviceMemoryBytes = ::perftools::gputools::DeviceMemory; + using Stream = se::Stream; + using DeviceMemoryBytes = se::DeviceMemory; CublasScratchAllocator(OpKernelContext* context) : context_(context) {} int64 GetMemoryLimitInBytes(Stream* stream) override { return -1; } - perftools::gputools::port::StatusOr AllocateBytes( + se::port::StatusOr AllocateBytes( Stream* stream, int64 byte_size) override { Tensor temporary_memory; Status allocation_status(context_->allocate_temp( DT_UINT8, TensorShape({byte_size}), &temporary_memory)); if (!allocation_status.ok()) { - return perftools::gputools::port::StatusOr( + return se::port::StatusOr( DeviceMemoryBytes::MakeFromByteSize(nullptr, 0)); } // Hold the reference of the allocated tensors until the end of the // allocator. allocated_tensors_.push_back(temporary_memory); - return perftools::gputools::port::StatusOr( + return se::port::StatusOr( DeviceMemoryBytes::MakeFromByteSize( temporary_memory.flat().data(), temporary_memory.flat().size())); @@ -289,12 +289,11 @@ template struct LaunchBatchMatMul { static void Launch(OpKernelContext* context, const Tensor& in_x, const Tensor& in_y, bool adj_x, bool adj_y, Tensor* out) { - constexpr perftools::gputools::blas::Transpose kTranspose = - is_complex::value - ? perftools::gputools::blas::Transpose::kConjugateTranspose - : perftools::gputools::blas::Transpose::kTranspose; - perftools::gputools::blas::Transpose trans[] = { - perftools::gputools::blas::Transpose::kNoTranspose, kTranspose}; + constexpr se::blas::Transpose kTranspose = + is_complex::value ? se::blas::Transpose::kConjugateTranspose + : se::blas::Transpose::kTranspose; + se::blas::Transpose trans[] = {se::blas::Transpose::kNoTranspose, + kTranspose}; const uint64 m = in_x.dim_size(adj_x ? 2 : 1); const uint64 k = in_x.dim_size(adj_x ? 1 : 2); const uint64 n = in_y.dim_size(adj_y ? 1 : 2); @@ -305,7 +304,7 @@ struct LaunchBatchMatMul { auto* stream = context->op_device_context()->stream(); OP_REQUIRES(context, stream, errors::Internal("No GPU stream available.")); - typedef perftools::gputools::DeviceMemory DeviceMemoryType; + typedef se::DeviceMemory DeviceMemoryType; std::vector a_device_memory; std::vector b_device_memory; std::vector c_device_memory; @@ -340,19 +339,16 @@ struct LaunchBatchMatMul { // This is a regular matrix*matrix or matrix*vector multiply. Avoid the // overhead of the scratch allocator and the batch interface. if (n == 1 && - blas_transpose_b != - perftools::gputools::blas::Transpose::kConjugateTranspose && - blas_transpose_a != - perftools::gputools::blas::Transpose::kConjugateTranspose) { + blas_transpose_b != se::blas::Transpose::kConjugateTranspose && + blas_transpose_a != se::blas::Transpose::kConjugateTranspose) { // This is a matrix*vector multiply so use GEMV to compute A * b. // Here we are multiplying in the natural order, so we have to flip // the transposition flag to compensate for the tensor being stored // row-major. Since GEMV doesn't provide a way to just conjugate an // argument, we have to defer those cases to GEMM below. - auto gemv_trans_a = - blas_transpose_a == perftools::gputools::blas::Transpose::kTranspose - ? perftools::gputools::blas::Transpose::kNoTranspose - : perftools::gputools::blas::Transpose::kTranspose; + auto gemv_trans_a = blas_transpose_a == se::blas::Transpose::kTranspose + ? se::blas::Transpose::kNoTranspose + : se::blas::Transpose::kTranspose; bool blas_launch_status = stream ->ThenBlasGemv(gemv_trans_a, adj_x ? m : k, adj_x ? k : m, diff --git a/tensorflow/core/kernels/bias_op.cc b/tensorflow/core/kernels/bias_op.cc index 368993c827..9fda7169a8 100644 --- a/tensorflow/core/kernels/bias_op.cc +++ b/tensorflow/core/kernels/bias_op.cc @@ -393,8 +393,8 @@ class BiasGradOp : public OpKernel { if (channel == 0) return; auto* stream = context->op_device_context()->stream(); OP_REQUIRES(context, stream, errors::Internal("No GPU stream available.")); - perftools::gputools::DeviceMemoryBase output_ptr( - output->flat().data(), output->NumElements() * sizeof(T)); + se::DeviceMemoryBase output_ptr(output->flat().data(), + output->NumElements() * sizeof(T)); stream->ThenMemZero(&output_ptr, output->NumElements() * sizeof(T)); if (output_backprop.NumElements() > 0) { BiasGradGPU::compute(context->template eigen_device(), diff --git a/tensorflow/core/kernels/check_numerics_op.cc b/tensorflow/core/kernels/check_numerics_op.cc index d3b67f4614..c3c0c50007 100644 --- a/tensorflow/core/kernels/check_numerics_op.cc +++ b/tensorflow/core/kernels/check_numerics_op.cc @@ -139,7 +139,7 @@ class CheckNumericsOp : public AsyncOpKernel { OP_REQUIRES_ASYNC(context, stream != nullptr, errors::Internal("No GPU stream available."), done); - perftools::gputools::DeviceMemoryBase abnormal_detected_ptr( + se::DeviceMemoryBase abnormal_detected_ptr( abnormal_detected.flat().data(), abnormal_detected.flat().size()); stream->ThenMemset32(&abnormal_detected_ptr, 0, @@ -174,8 +174,8 @@ class CheckNumericsOp : public AsyncOpKernel { TensorReference abnormal_detected_ref(abnormal_detected); auto check_cb = [this, stream, abnormal_detected_ref, abnormal_detected_host, context, done]() { - ::perftools::gputools::cuda::ScopedActivateExecutorContext - scoped_activation{stream->parent()}; + se::cuda::ScopedActivateExecutorContext scoped_activation{ + stream->parent()}; auto abnormal_detected_host_flat = abnormal_detected_host.flat(); int is_nan = abnormal_detected_host_flat(0); int is_inf = abnormal_detected_host_flat(1); diff --git a/tensorflow/core/kernels/conv_grad_filter_ops.cc b/tensorflow/core/kernels/conv_grad_filter_ops.cc index f3b91494b9..ef1e73e5ab 100644 --- a/tensorflow/core/kernels/conv_grad_filter_ops.cc +++ b/tensorflow/core/kernels/conv_grad_filter_ops.cc @@ -532,7 +532,7 @@ struct ConvBackwardFilterAutoTuneGroup { static string name() { return "ConvBwdFilter"; } }; typedef AutoTuneSingleton + se::dnn::AlgorithmConfig> AutoTuneConvBwdFilter; // Backprop for filter. @@ -636,9 +636,9 @@ void LaunchConv2DBackpropFilterOp::operator()( const Tensor& out_backprop, const Tensor& input, int row_dilation, int col_dilation, int row_stride, int col_stride, const Padding& padding, Tensor* filter_backprop, TensorFormat data_format) { - using perftools::gputools::dnn::AlgorithmConfig; - using perftools::gputools::dnn::AlgorithmDesc; - using perftools::gputools::dnn::ProfileResult; + using se::dnn::AlgorithmConfig; + using se::dnn::AlgorithmDesc; + using se::dnn::ProfileResult; std::vector dilations(4, 1); dilations[GetTensorDimIndex(data_format, 'H')] = row_dilation; @@ -721,9 +721,9 @@ void LaunchConv2DBackpropFilterOp::operator()( bool blas_launch_status = stream - ->ThenBlasGemm(perftools::gputools::blas::Transpose::kNoTranspose, - perftools::gputools::blas::Transpose::kTranspose, n, - m, k, 1.0f, a_ptr, n, b_ptr, m, 0.0f, &c_ptr, n) + ->ThenBlasGemm(se::blas::Transpose::kNoTranspose, + se::blas::Transpose::kTranspose, n, m, k, 1.0f, + a_ptr, n, b_ptr, m, 0.0f, &c_ptr, n) .ok(); if (!blas_launch_status) { ctx->SetStatus(errors::Internal("Blas SGEMM launch failed : m=", m, @@ -751,9 +751,9 @@ void LaunchConv2DBackpropFilterOp::operator()( bool blas_launch_status = stream - ->ThenBlasGemm(perftools::gputools::blas::Transpose::kNoTranspose, - perftools::gputools::blas::Transpose::kTranspose, n, - m, k, 1.0f, b_ptr, n, a_ptr, m, 0.0f, &c_ptr, n) + ->ThenBlasGemm(se::blas::Transpose::kNoTranspose, + se::blas::Transpose::kTranspose, n, m, k, 1.0f, + b_ptr, n, a_ptr, m, 0.0f, &c_ptr, n) .ok(); if (!blas_launch_status) { ctx->SetStatus(errors::Internal("Blas SGEMM launch failed : m=", m, @@ -787,24 +787,24 @@ void LaunchConv2DBackpropFilterOp::operator()( CHECK(padding_rows >= 0 && padding_cols >= 0) << "Negative row or col paddings: (" << padding_rows << ", " << padding_cols << ")"; - perftools::gputools::dnn::BatchDescriptor input_desc; + se::dnn::BatchDescriptor input_desc; input_desc.set_count(dims.batch_size) .set_height(GetTensorDim(compatible_input, data_format, 'H')) .set_width(GetTensorDim(compatible_input, data_format, 'W')) .set_feature_map_count(dims.in_depth) - .set_layout(perftools::gputools::dnn::DataLayout::kBatchDepthYX); - perftools::gputools::dnn::BatchDescriptor output_desc; + .set_layout(se::dnn::DataLayout::kBatchDepthYX); + se::dnn::BatchDescriptor output_desc; output_desc.set_count(dims.batch_size) .set_height(dims.spatial_dims[0].output_size) .set_width(dims.spatial_dims[1].output_size) .set_feature_map_count(dims.out_depth) - .set_layout(perftools::gputools::dnn::DataLayout::kBatchDepthYX); - perftools::gputools::dnn::FilterDescriptor filter_desc; + .set_layout(se::dnn::DataLayout::kBatchDepthYX); + se::dnn::FilterDescriptor filter_desc; filter_desc.set_input_filter_height(dims.spatial_dims[0].filter_size) .set_input_filter_width(dims.spatial_dims[1].filter_size) .set_input_feature_map_count(dims.in_depth) .set_output_feature_map_count(dims.out_depth); - perftools::gputools::dnn::ConvolutionDescriptor conv_desc; + se::dnn::ConvolutionDescriptor conv_desc; conv_desc.set_vertical_dilation_rate(dims.spatial_dims[0].dilation) .set_horizontal_dilation_rate(dims.spatial_dims[1].dilation) .set_vertical_filter_stride(dims.spatial_dims[0].stride) diff --git a/tensorflow/core/kernels/conv_grad_input_ops.cc b/tensorflow/core/kernels/conv_grad_input_ops.cc index 66d15c6e78..35f2676023 100644 --- a/tensorflow/core/kernels/conv_grad_input_ops.cc +++ b/tensorflow/core/kernels/conv_grad_input_ops.cc @@ -604,7 +604,7 @@ struct ConvBackwardDataAutoTuneGroup { static string name() { return "ConvBwdData"; } }; typedef AutoTuneSingleton + se::dnn::AlgorithmConfig> AutoTuneConvBwdData; // Backprop for input. @@ -705,9 +705,9 @@ void LaunchConv2DBackpropInputOp::operator()( const Tensor& out_backprop, const Tensor& filter, int row_dilation, int col_dilation, int row_stride, int col_stride, const Padding& padding, Tensor* in_backprop, TensorFormat data_format) { - using perftools::gputools::dnn::AlgorithmConfig; - using perftools::gputools::dnn::AlgorithmDesc; - using perftools::gputools::dnn::ProfileResult; + using se::dnn::AlgorithmConfig; + using se::dnn::AlgorithmDesc; + using se::dnn::ProfileResult; std::vector strides(4, 1); std::vector dilations(4, 1); @@ -778,8 +778,8 @@ void LaunchConv2DBackpropInputOp::operator()( auto c_ptr = AsDeviceMemory(in_backprop->template flat().data(), in_backprop->template flat().size()); - auto transpose = perftools::gputools::blas::Transpose::kTranspose; - auto no_transpose = perftools::gputools::blas::Transpose::kNoTranspose; + auto transpose = se::blas::Transpose::kTranspose; + auto no_transpose = se::blas::Transpose::kNoTranspose; bool blas_launch_status = stream @@ -810,8 +810,8 @@ void LaunchConv2DBackpropInputOp::operator()( auto c_ptr = AsDeviceMemory(in_backprop->template flat().data(), in_backprop->template flat().size()); - auto transpose = perftools::gputools::blas::Transpose::kTranspose; - auto no_transpose = perftools::gputools::blas::Transpose::kNoTranspose; + auto transpose = se::blas::Transpose::kTranspose; + auto no_transpose = se::blas::Transpose::kNoTranspose; bool blas_launch_status = stream @@ -841,24 +841,24 @@ void LaunchConv2DBackpropInputOp::operator()( CHECK(padding_rows >= 0 && padding_cols >= 0) << "Negative row or col paddings: (" << padding_rows << ", " << padding_cols << ")"; - perftools::gputools::dnn::BatchDescriptor input_desc; + se::dnn::BatchDescriptor input_desc; input_desc.set_count(dims.batch_size) .set_height(GetTensorDim(compatible_input_shape, data_format, 'H')) .set_width(GetTensorDim(compatible_input_shape, data_format, 'W')) .set_feature_map_count(dims.in_depth) - .set_layout(perftools::gputools::dnn::DataLayout::kBatchDepthYX); - perftools::gputools::dnn::BatchDescriptor output_desc; + .set_layout(se::dnn::DataLayout::kBatchDepthYX); + se::dnn::BatchDescriptor output_desc; output_desc.set_count(dims.batch_size) .set_height(dims.spatial_dims[0].output_size) .set_width(dims.spatial_dims[1].output_size) .set_feature_map_count(dims.out_depth) - .set_layout(perftools::gputools::dnn::DataLayout::kBatchDepthYX); - perftools::gputools::dnn::FilterDescriptor filter_desc; + .set_layout(se::dnn::DataLayout::kBatchDepthYX); + se::dnn::FilterDescriptor filter_desc; filter_desc.set_input_filter_height(dims.spatial_dims[0].filter_size) .set_input_filter_width(dims.spatial_dims[1].filter_size) .set_input_feature_map_count(dims.in_depth) .set_output_feature_map_count(dims.out_depth); - perftools::gputools::dnn::ConvolutionDescriptor conv_desc; + se::dnn::ConvolutionDescriptor conv_desc; conv_desc.set_vertical_dilation_rate(dims.spatial_dims[0].dilation) .set_horizontal_dilation_rate(dims.spatial_dims[1].dilation) .set_vertical_filter_stride(dims.spatial_dims[0].stride) diff --git a/tensorflow/core/kernels/conv_grad_ops_3d.cc b/tensorflow/core/kernels/conv_grad_ops_3d.cc index 092e859a5b..9edc6d416e 100644 --- a/tensorflow/core/kernels/conv_grad_ops_3d.cc +++ b/tensorflow/core/kernels/conv_grad_ops_3d.cc @@ -35,7 +35,7 @@ limitations under the License. #if GOOGLE_CUDA #include "tensorflow/core/platform/stream_executor.h" -using perftools::gputools::dnn::DimIndex; +using stream_executor::dnn::DimIndex; #endif namespace tensorflow { @@ -468,7 +468,7 @@ struct Conv3dBackwardDataAutoTuneGroup { static string name() { return "Conv3dBwdData"; } }; typedef AutoTuneSingleton + se::dnn::AlgorithmConfig> AutoTuneConv3dBwdData; template @@ -554,8 +554,8 @@ class Conv3DBackpropInputOp : public OpKernel { auto c_ptr = AsDeviceMemory(in_backprop->template flat().data(), in_backprop->template flat().size()); - auto transpose = perftools::gputools::blas::Transpose::kTranspose; - auto no_transpose = perftools::gputools::blas::Transpose::kNoTranspose; + auto transpose = se::blas::Transpose::kTranspose; + auto no_transpose = se::blas::Transpose::kNoTranspose; bool blas_launch_status = stream @@ -582,8 +582,8 @@ class Conv3DBackpropInputOp : public OpKernel { auto c_ptr = AsDeviceMemory(in_backprop->template flat().data(), in_backprop->template flat().size()); - auto transpose = perftools::gputools::blas::Transpose::kTranspose; - auto no_transpose = perftools::gputools::blas::Transpose::kNoTranspose; + auto transpose = se::blas::Transpose::kTranspose; + auto no_transpose = se::blas::Transpose::kNoTranspose; bool blas_launch_status = stream @@ -629,27 +629,27 @@ class Conv3DBackpropInputOp : public OpKernel { CHECK(padding_rows >= 0 && padding_cols >= 0 && padding_planes >= 0) << "Negative paddings: (" << padding_rows << ", " << padding_cols << ", " << padding_planes << ")"; - perftools::gputools::dnn::BatchDescriptor input_desc(3); + se::dnn::BatchDescriptor input_desc(3); input_desc.set_count(batch) .set_spatial_dim(DimIndex::X, compatible_input_shape.dim_size(4)) .set_spatial_dim(DimIndex::Y, compatible_input_shape.dim_size(3)) .set_spatial_dim(DimIndex::Z, compatible_input_shape.dim_size(2)) .set_feature_map_count(in_depth) - .set_layout(perftools::gputools::dnn::DataLayout::kBatchDepthYX); - perftools::gputools::dnn::BatchDescriptor output_desc(3); + .set_layout(se::dnn::DataLayout::kBatchDepthYX); + se::dnn::BatchDescriptor output_desc(3); output_desc.set_count(batch) .set_spatial_dim(DimIndex::X, output_cols) .set_spatial_dim(DimIndex::Y, output_rows) .set_spatial_dim(DimIndex::Z, output_planes) .set_feature_map_count(out_depth) - .set_layout(perftools::gputools::dnn::DataLayout::kBatchDepthYX); - perftools::gputools::dnn::FilterDescriptor filter_desc(3); + .set_layout(se::dnn::DataLayout::kBatchDepthYX); + se::dnn::FilterDescriptor filter_desc(3); filter_desc.set_spatial_dim(DimIndex::X, filter_size[2]) .set_spatial_dim(DimIndex::Y, filter_size[1]) .set_spatial_dim(DimIndex::Z, filter_size[0]) .set_input_feature_map_count(in_depth) .set_output_feature_map_count(out_depth); - perftools::gputools::dnn::ConvolutionDescriptor conv_desc(3); + se::dnn::ConvolutionDescriptor conv_desc(3); conv_desc.set_dilation_rate(DimIndex::X, dilations[2]) .set_dilation_rate(DimIndex::Y, dilations[1]) .set_dilation_rate(DimIndex::Z, dilations[0]) @@ -725,9 +725,9 @@ class Conv3DBackpropInputOp : public OpKernel { device_id, }; - using perftools::gputools::dnn::AlgorithmConfig; - using perftools::gputools::dnn::AlgorithmDesc; - using perftools::gputools::dnn::ProfileResult; + using se::dnn::AlgorithmConfig; + using se::dnn::AlgorithmDesc; + using se::dnn::ProfileResult; AlgorithmConfig algorithm_config; if (cudnn_use_autotune_ && !AutoTuneConv3dBwdData::GetInstance()->Find( conv_parameters, &algorithm_config)) { @@ -839,7 +839,7 @@ struct Conv3dBackwardFilterAutoTuneGroup { static string name() { return "Conv3dBwdFilter"; } }; typedef AutoTuneSingleton + se::dnn::AlgorithmConfig> AutoTuneConv3dBwdFilter; template @@ -941,9 +941,9 @@ class Conv3DBackpropFilterOp : public OpKernel { bool blas_launch_status = stream - ->ThenBlasGemm(perftools::gputools::blas::Transpose::kNoTranspose, - perftools::gputools::blas::Transpose::kTranspose, - n, m, k, 1.0f, a_ptr, n, b_ptr, m, 0.0f, &c_ptr, n) + ->ThenBlasGemm(se::blas::Transpose::kNoTranspose, + se::blas::Transpose::kTranspose, n, m, k, 1.0f, + a_ptr, n, b_ptr, m, 0.0f, &c_ptr, n) .ok(); if (!blas_launch_status) { context->SetStatus(errors::Internal("Blas SGEMM launch failed : m=", m, @@ -967,9 +967,9 @@ class Conv3DBackpropFilterOp : public OpKernel { bool blas_launch_status = stream - ->ThenBlasGemm(perftools::gputools::blas::Transpose::kNoTranspose, - perftools::gputools::blas::Transpose::kTranspose, - n, m, k, 1.0f, b_ptr, n, a_ptr, m, 0.0f, &c_ptr, n) + ->ThenBlasGemm(se::blas::Transpose::kNoTranspose, + se::blas::Transpose::kTranspose, n, m, k, 1.0f, + b_ptr, n, a_ptr, m, 0.0f, &c_ptr, n) .ok(); if (!blas_launch_status) { context->SetStatus(errors::Internal("Blas SGEMM launch failed : m=", m, @@ -1014,7 +1014,7 @@ class Conv3DBackpropFilterOp : public OpKernel { CHECK(padding_rows >= 0 && padding_cols >= 0 && padding_planes >= 0) << "Negative paddings: (" << padding_rows << ", " << padding_cols << ", " << padding_planes << ")"; - perftools::gputools::dnn::BatchDescriptor input_desc(3); + se::dnn::BatchDescriptor input_desc(3); input_desc.set_count(batch) .set_spatial_dim(DimIndex::X, GetTensorDim(compatible_input, data_format_, '2')) @@ -1023,21 +1023,21 @@ class Conv3DBackpropFilterOp : public OpKernel { .set_spatial_dim(DimIndex::Z, GetTensorDim(compatible_input, data_format_, '0')) .set_feature_map_count(in_depth) - .set_layout(perftools::gputools::dnn::DataLayout::kBatchDepthYX); - perftools::gputools::dnn::BatchDescriptor output_desc(3); + .set_layout(se::dnn::DataLayout::kBatchDepthYX); + se::dnn::BatchDescriptor output_desc(3); output_desc.set_count(batch) .set_spatial_dim(DimIndex::X, output_cols) .set_spatial_dim(DimIndex::Y, output_rows) .set_spatial_dim(DimIndex::Z, output_planes) .set_feature_map_count(out_depth) - .set_layout(perftools::gputools::dnn::DataLayout::kBatchDepthYX); - perftools::gputools::dnn::FilterDescriptor filter_desc(3); + .set_layout(se::dnn::DataLayout::kBatchDepthYX); + se::dnn::FilterDescriptor filter_desc(3); filter_desc.set_spatial_dim(DimIndex::X, filter_size[2]) .set_spatial_dim(DimIndex::Y, filter_size[1]) .set_spatial_dim(DimIndex::Z, filter_size[0]) .set_input_feature_map_count(in_depth) .set_output_feature_map_count(out_depth); - perftools::gputools::dnn::ConvolutionDescriptor conv_desc(3); + se::dnn::ConvolutionDescriptor conv_desc(3); conv_desc.set_dilation_rate(DimIndex::X, dilations[2]) .set_dilation_rate(DimIndex::Y, dilations[1]) .set_dilation_rate(DimIndex::Z, dilations[0]) @@ -1121,9 +1121,9 @@ class Conv3DBackpropFilterOp : public OpKernel { device_id, }; - using perftools::gputools::dnn::AlgorithmConfig; - using perftools::gputools::dnn::AlgorithmDesc; - using perftools::gputools::dnn::ProfileResult; + using se::dnn::AlgorithmConfig; + using se::dnn::AlgorithmDesc; + using se::dnn::ProfileResult; AlgorithmConfig algorithm_config; if (cudnn_use_autotune_ && !AutoTuneConv3dBwdFilter::GetInstance()->Find( conv_parameters, &algorithm_config)) { diff --git a/tensorflow/core/kernels/conv_ops.cc b/tensorflow/core/kernels/conv_ops.cc index f0888c655f..c6d36b40fe 100644 --- a/tensorflow/core/kernels/conv_ops.cc +++ b/tensorflow/core/kernels/conv_ops.cc @@ -475,7 +475,7 @@ struct ConvAutoTuneGroup { static string name() { return "Conv"; } }; typedef AutoTuneSingleton + se::dnn::AlgorithmConfig> AutoTuneConv; template @@ -484,9 +484,9 @@ void LaunchConv2DOp::operator()( const Tensor& input_param, const Tensor& filter, int row_dilation, int col_dilation, int row_stride, int col_stride, const Padding& padding, Tensor* output, TensorFormat data_format) { - using perftools::gputools::dnn::AlgorithmConfig; - using perftools::gputools::dnn::AlgorithmDesc; - using perftools::gputools::dnn::ProfileResult; + using se::dnn::AlgorithmConfig; + using se::dnn::AlgorithmDesc; + using se::dnn::ProfileResult; auto* stream = ctx->op_device_context()->stream(); OP_REQUIRES(ctx, stream, errors::Internal("No GPU stream available.")); @@ -514,7 +514,7 @@ void LaunchConv2DOp::operator()( auto c_ptr = AsDeviceMemory(output->template flat().data(), output->template flat().size()); - auto no_transpose = perftools::gputools::blas::Transpose::kNoTranspose; + auto no_transpose = se::blas::Transpose::kNoTranspose; bool blas_launch_status = stream ->ThenBlasGemm(no_transpose, no_transpose, n, m, k, 1.0f, b_ptr, n, @@ -543,7 +543,7 @@ void LaunchConv2DOp::operator()( auto c_ptr = AsDeviceMemory(output->template flat().data(), output->template flat().size()); - auto no_transpose = perftools::gputools::blas::Transpose::kNoTranspose; + auto no_transpose = se::blas::Transpose::kNoTranspose; bool blas_launch_status = stream ->ThenBlasGemm(no_transpose, no_transpose, n, m, k, 1.0f, b_ptr, n, @@ -629,24 +629,24 @@ void LaunchConv2DOp::operator()( CHECK(padding_rows >= 0 && padding_cols >= 0) << "Negative row or col paddings: (" << padding_rows << ", " << padding_cols << ")"; - perftools::gputools::dnn::BatchDescriptor input_desc; + se::dnn::BatchDescriptor input_desc; input_desc.set_count(in_batch) .set_feature_map_count(in_depths) .set_height(in_rows) .set_width(in_cols) - .set_layout(perftools::gputools::dnn::DataLayout::kBatchDepthYX); - perftools::gputools::dnn::BatchDescriptor output_desc; + .set_layout(se::dnn::DataLayout::kBatchDepthYX); + se::dnn::BatchDescriptor output_desc; output_desc.set_count(out_batch) .set_height(out_rows) .set_width(out_cols) .set_feature_map_count(out_depths) - .set_layout(perftools::gputools::dnn::DataLayout::kBatchDepthYX); - perftools::gputools::dnn::FilterDescriptor filter_desc; + .set_layout(se::dnn::DataLayout::kBatchDepthYX); + se::dnn::FilterDescriptor filter_desc; filter_desc.set_input_filter_height(filter.dim_size(0)) .set_input_filter_width(filter.dim_size(1)) .set_input_feature_map_count(filter.dim_size(2)) .set_output_feature_map_count(filter.dim_size(3)); - perftools::gputools::dnn::ConvolutionDescriptor conv_desc; + se::dnn::ConvolutionDescriptor conv_desc; conv_desc.set_vertical_dilation_rate(row_dilation) .set_horizontal_dilation_rate(col_dilation) .set_vertical_filter_stride(row_stride) diff --git a/tensorflow/core/kernels/conv_ops_3d.cc b/tensorflow/core/kernels/conv_ops_3d.cc index 48dd3c9eb0..9ec16be67d 100644 --- a/tensorflow/core/kernels/conv_ops_3d.cc +++ b/tensorflow/core/kernels/conv_ops_3d.cc @@ -34,7 +34,7 @@ limitations under the License. #if GOOGLE_CUDA #include "tensorflow/core/platform/stream_executor.h" -using perftools::gputools::dnn::DimIndex; +using stream_executor::dnn::DimIndex; #endif namespace tensorflow { @@ -192,7 +192,7 @@ struct Conv3dAutoTuneGroup { static string name() { return "Conv3d"; } }; typedef AutoTuneSingleton + se::dnn::AlgorithmConfig> AutoTuneConv3d; // TODO(mjanusz): Share logic with 2d implementation as much as possible. @@ -250,7 +250,7 @@ struct LaunchConvOp { auto c_ptr = AsDeviceMemory(output->template flat().data(), output->template flat().size()); - auto no_transpose = perftools::gputools::blas::Transpose::kNoTranspose; + auto no_transpose = se::blas::Transpose::kNoTranspose; bool blas_launch_status = stream ->ThenBlasGemm(no_transpose, no_transpose, n, m, k, 1.0f, b_ptr, @@ -277,7 +277,7 @@ struct LaunchConvOp { auto c_ptr = AsDeviceMemory(output->template flat().data(), output->template flat().size()); - auto no_transpose = perftools::gputools::blas::Transpose::kNoTranspose; + auto no_transpose = se::blas::Transpose::kNoTranspose; bool blas_launch_status = stream ->ThenBlasGemm(no_transpose, no_transpose, n, m, k, 1.0f, b_ptr, @@ -346,27 +346,27 @@ struct LaunchConvOp { CHECK(pad_rows >= 0 && pad_cols >= 0 && pad_planes >= 0) << "Negative paddings: (" << pad_rows << ", " << pad_cols << ", " << pad_planes << ")"; - perftools::gputools::dnn::BatchDescriptor input_desc(3); + se::dnn::BatchDescriptor input_desc(3); input_desc.set_count(in_batch) .set_feature_map_count(in_depth) .set_spatial_dim(DimIndex::X, in_cols) .set_spatial_dim(DimIndex::Y, in_rows) .set_spatial_dim(DimIndex::Z, in_planes) - .set_layout(perftools::gputools::dnn::DataLayout::kBatchDepthYX); - perftools::gputools::dnn::BatchDescriptor output_desc(3); + .set_layout(se::dnn::DataLayout::kBatchDepthYX); + se::dnn::BatchDescriptor output_desc(3); output_desc.set_count(in_batch) .set_spatial_dim(DimIndex::X, out_cols) .set_spatial_dim(DimIndex::Y, out_rows) .set_spatial_dim(DimIndex::Z, out_planes) .set_feature_map_count(out_depth) - .set_layout(perftools::gputools::dnn::DataLayout::kBatchDepthYX); - perftools::gputools::dnn::FilterDescriptor filter_desc(3); + .set_layout(se::dnn::DataLayout::kBatchDepthYX); + se::dnn::FilterDescriptor filter_desc(3); filter_desc.set_spatial_dim(DimIndex::X, filter_cols) .set_spatial_dim(DimIndex::Y, filter_rows) .set_spatial_dim(DimIndex::Z, filter_planes) .set_input_feature_map_count(in_depth) .set_output_feature_map_count(out_depth); - perftools::gputools::dnn::ConvolutionDescriptor conv_desc(3); + se::dnn::ConvolutionDescriptor conv_desc(3); conv_desc.set_dilation_rate(DimIndex::X, dilations[2]) .set_dilation_rate(DimIndex::Y, dilations[1]) .set_dilation_rate(DimIndex::Z, dilations[0]) @@ -424,9 +424,9 @@ struct LaunchConvOp { device_id, }; - using perftools::gputools::dnn::AlgorithmConfig; - using perftools::gputools::dnn::AlgorithmDesc; - using perftools::gputools::dnn::ProfileResult; + using se::dnn::AlgorithmConfig; + using se::dnn::AlgorithmDesc; + using se::dnn::ProfileResult; AlgorithmConfig algorithm_config; diff --git a/tensorflow/core/kernels/conv_ops_gpu.h b/tensorflow/core/kernels/conv_ops_gpu.h index 7f9cfec981..4215c4541c 100644 --- a/tensorflow/core/kernels/conv_ops_gpu.h +++ b/tensorflow/core/kernels/conv_ops_gpu.h @@ -36,25 +36,23 @@ int64 GetCudnnWorkspaceLimit(const string& envvar_in_mb, // A class to provide scratch-space allocator for Stream-Executor Cudnn // callback. TensorFlow is responsible for releasing the temporary buffers after // the kernel finishes. -class CudnnScratchAllocator : public perftools::gputools::ScratchAllocator { +class CudnnScratchAllocator : public se::ScratchAllocator { public: virtual ~CudnnScratchAllocator() {} CudnnScratchAllocator(int64 memory_limit, OpKernelContext* context) : memory_limit_(memory_limit), total_byte_size_(0), context_(context) {} - int64 GetMemoryLimitInBytes(perftools::gputools::Stream* stream) override { + int64 GetMemoryLimitInBytes(se::Stream* stream) override { return memory_limit_; } - perftools::gputools::port::StatusOr> - AllocateBytes(perftools::gputools::Stream* stream, int64 byte_size) override { + se::port::StatusOr> AllocateBytes( + se::Stream* stream, int64 byte_size) override { Tensor temporary_memory; if (byte_size < 0) { - return perftools::gputools::port::Status{ - perftools::gputools::port::error::INVALID_ARGUMENT, - "Requested negative byte size!"}; + return se::port::Status{se::port::error::INVALID_ARGUMENT, + "Requested negative byte size!"}; } if (byte_size > memory_limit_) { - return perftools::gputools::port::StatusOr< - perftools::gputools::DeviceMemory>(); + return se::port::StatusOr>(); } AllocationAttributes allocation_attr; allocation_attr.no_retry_on_failure = true; @@ -62,15 +60,13 @@ class CudnnScratchAllocator : public perftools::gputools::ScratchAllocator { DT_UINT8, TensorShape({byte_size}), &temporary_memory, AllocatorAttributes(), allocation_attr)); if (!allocation_status.ok()) { - return perftools::gputools::port::StatusOr< - perftools::gputools::DeviceMemory>(); + return se::port::StatusOr>(); } // Hold the reference of the allocated tensors until the end of the // allocator. allocated_tensors_.push_back(temporary_memory); total_byte_size_ += byte_size; - return perftools::gputools::port::StatusOr< - perftools::gputools::DeviceMemory>( + return se::port::StatusOr>( AsDeviceMemory(temporary_memory.flat().data(), temporary_memory.flat().size())); } @@ -141,9 +137,9 @@ class ConvParameters { // for certain input parameters so as to avoid a bug in cuDNNv5 and cuDNNv6. template bool ShouldIncludeWinogradNonfusedAlgo( - perftools::gputools::StreamExecutor* stream_exec) const { + se::StreamExecutor* stream_exec) const { // Skip this check for cuDNN 7 and newer. - perftools::gputools::port::StatusOr> version = + se::port::StatusOr> version = stream_exec->AsDnn()->GetVersion(); if (version.ok() && std::get<0>(version.ValueOrDie()) >= 7) { return true; diff --git a/tensorflow/core/kernels/crop_and_resize_op.cc b/tensorflow/core/kernels/crop_and_resize_op.cc index 45cc2fbbb8..54ef9c6fb4 100644 --- a/tensorflow/core/kernels/crop_and_resize_op.cc +++ b/tensorflow/core/kernels/crop_and_resize_op.cc @@ -39,17 +39,16 @@ limitations under the License. #include "tensorflow/core/platform/cuda.h" #include "tensorflow/core/platform/stream_executor.h" -using ::perftools::gputools::cuda::ScopedActivateExecutorContext; +using stream_executor::cuda::ScopedActivateExecutorContext; #endif // GOOGLE_CUDA namespace tensorflow { +namespace { typedef Eigen::ThreadPoolDevice CPUDevice; typedef Eigen::GpuDevice GPUDevice; using Callback = std::function; -namespace { - static inline Status ParseAndCheckBoxSizes(const Tensor& boxes, const Tensor& box_index, int* num_boxes) { @@ -753,8 +752,7 @@ inline void RunIfBoxIndexIsValid( context->allocate_temp(DataTypeToEnum::value, TensorShape({}), &isvalid_host_tensor, alloc_attr), done); - perftools::gputools::DeviceMemoryBase wrapped(isvalid_dev.data(), - sizeof(bool)); + se::DeviceMemoryBase wrapped(isvalid_dev.data(), sizeof(bool)); const bool status = stream ->ThenMemcpy( diff --git a/tensorflow/core/kernels/cuda_device_array.h b/tensorflow/core/kernels/cuda_device_array.h index e7a5db0683..74dc298c7a 100644 --- a/tensorflow/core/kernels/cuda_device_array.h +++ b/tensorflow/core/kernels/cuda_device_array.h @@ -80,7 +80,7 @@ class CudaDeviceArrayOnHost { TensorReference tensor_ref(out_of_line_values_on_host_); TF_RETURN_IF_ERROR(context_->allocate_temp( DT_INT8, TensorShape{total_bytes_}, &out_of_line_values_on_gpu_)); - perftools::gputools::DeviceMemoryBase output_values_base{ + se::DeviceMemoryBase output_values_base{ out_of_line_values_on_gpu_.flat().data(), static_cast(total_bytes_)}; stream->ThenMemcpy(&output_values_base, diff --git a/tensorflow/core/kernels/cuda_solvers.cc b/tensorflow/core/kernels/cuda_solvers.cc index 6cec032f94..a857bd3ce4 100644 --- a/tensorflow/core/kernels/cuda_solvers.cc +++ b/tensorflow/core/kernels/cuda_solvers.cc @@ -35,8 +35,6 @@ #include "tensorflow/core/platform/stream_executor.h" #include "tensorflow/core/platform/types.h" -using ::perftools::gputools::cuda::ScopedActivateExecutorContext; - // The CUDA cublas_api.h API contains const-correctness errors. Instead of // casting away constness on our data, we instead reinterpret the CuBLAS // functions as what they were clearly meant to be, and thus we can call @@ -80,10 +78,12 @@ using matinv_Z = cublasStatus_t(cublasContext*, int, const double2* const*, int, namespace tensorflow { namespace { +using se::cuda::ScopedActivateExecutorContext; + inline bool CopyHostToDevice(OpKernelContext* context, void* dst, const void* src, uint64 bytes) { auto stream = context->op_device_context()->stream(); - perftools::gputools::DeviceMemoryBase wrapped_dst(dst); + se::DeviceMemoryBase wrapped_dst(dst); return stream->ThenMemcpy(&wrapped_dst, src, bytes).ok(); } diff --git a/tensorflow/core/kernels/cuda_solvers.h b/tensorflow/core/kernels/cuda_solvers.h index ecfa23750c..b2e8ee23a9 100644 --- a/tensorflow/core/kernels/cuda_solvers.h +++ b/tensorflow/core/kernels/cuda_solvers.h @@ -398,7 +398,7 @@ class DeviceLapackInfo : public ScratchSpace { CHECK(success != nullptr); HostLapackInfo copy(context(), size(), debug_info()); auto stream = context()->op_device_context()->stream(); - perftools::gputools::DeviceMemoryBase wrapped_src( + se::DeviceMemoryBase wrapped_src( static_cast(const_cast(this->data()))); *success = stream->ThenMemcpy(copy.mutable_data(), wrapped_src, this->bytes()) diff --git a/tensorflow/core/kernels/cudnn_pooling_gpu.cc b/tensorflow/core/kernels/cudnn_pooling_gpu.cc index 5939ecdf62..d2b9c9edaa 100644 --- a/tensorflow/core/kernels/cudnn_pooling_gpu.cc +++ b/tensorflow/core/kernels/cudnn_pooling_gpu.cc @@ -31,12 +31,13 @@ namespace tensorflow { #if GOOGLE_CUDA template -void DnnPooling3dOp::Compute( - OpKernelContext* context, - perftools::gputools::dnn::PoolingMode pooling_mode, - const std::array& window, const std::array& stride, - const std::array& padding, TensorFormat data_format, - const Tensor& tensor_in, Tensor* output) { +void DnnPooling3dOp::Compute(OpKernelContext* context, + se::dnn::PoolingMode pooling_mode, + const std::array& window, + const std::array& stride, + const std::array& padding, + TensorFormat data_format, + const Tensor& tensor_in, Tensor* output) { const auto in_shape = tensor_in.shape(); const auto out_shape = output->shape(); @@ -67,18 +68,18 @@ void DnnPooling3dOp::Compute( transformed_output = *output; } - perftools::gputools::dnn::PoolingDescriptor pooling_desc(3); + se::dnn::PoolingDescriptor pooling_desc(3); pooling_desc.set_pooling_mode(pooling_mode); - perftools::gputools::dnn::BatchDescriptor input_desc(3); + se::dnn::BatchDescriptor input_desc(3); input_desc.set_count(in_batch) .set_feature_map_count(in_features) - .set_layout(perftools::gputools::dnn::DataLayout::kBatchDepthYX); - perftools::gputools::dnn::BatchDescriptor output_desc(3); + .set_layout(se::dnn::DataLayout::kBatchDepthYX); + se::dnn::BatchDescriptor output_desc(3); output_desc.set_count(in_batch) .set_feature_map_count(in_features) - .set_layout(perftools::gputools::dnn::DataLayout::kBatchDepthYX); + .set_layout(se::dnn::DataLayout::kBatchDepthYX); for (size_t i = 0; i < window.size(); ++i) { - const auto dim_i = static_cast(i); + const auto dim_i = static_cast(i); pooling_desc.set_window(dim_i, window[i]); pooling_desc.set_stride(dim_i, stride[i]); pooling_desc.set_padding(dim_i, padding[i]); @@ -115,14 +116,13 @@ void DnnPooling3dOp::Compute( template void DnnPooling3dGradOp::Compute( - OpKernelContext* context, - perftools::gputools::dnn::PoolingMode pooling_mode, + OpKernelContext* context, se::dnn::PoolingMode pooling_mode, const std::array& window, const std::array& stride, const std::array& padding, const std::array& output_size, TensorFormat data_format, const Tensor& out_backprop, const TensorShape& tensor_in_shape, const Tensor* tensor_in, const Tensor* tensor_out, Tensor* input_backprop) { - CHECK((pooling_mode != perftools::gputools::dnn::PoolingMode::kMaximum) || + CHECK((pooling_mode != se::dnn::PoolingMode::kMaximum) || (tensor_in && tensor_out)) << "For MaxPoolGrad, both tensor_in and tensor_out needs to be " "specified"; @@ -186,21 +186,21 @@ void DnnPooling3dGradOp::Compute( transformed_output_backprop.tensor()); } - perftools::gputools::dnn::PoolingDescriptor pooling_desc(3); + se::dnn::PoolingDescriptor pooling_desc(3); pooling_desc.set_pooling_mode(pooling_mode); - perftools::gputools::dnn::BatchDescriptor orig_output_desc(3); + se::dnn::BatchDescriptor orig_output_desc(3); orig_output_desc.set_count(in_batch) .set_feature_map_count(in_features) - .set_layout(perftools::gputools::dnn::DataLayout::kBatchDepthYX); + .set_layout(se::dnn::DataLayout::kBatchDepthYX); - perftools::gputools::dnn::BatchDescriptor orig_input_desc(3); + se::dnn::BatchDescriptor orig_input_desc(3); orig_input_desc.set_count(in_batch) .set_feature_map_count(in_features) - .set_layout(perftools::gputools::dnn::DataLayout::kBatchDepthYX); + .set_layout(se::dnn::DataLayout::kBatchDepthYX); for (size_t i = 0; i < window.size(); ++i) { - const auto dim_i = static_cast(i); + const auto dim_i = static_cast(i); pooling_desc.set_window(dim_i, window[i]); pooling_desc.set_stride(dim_i, stride[i]); pooling_desc.set_padding(dim_i, padding[i]); diff --git a/tensorflow/core/kernels/cudnn_pooling_gpu.h b/tensorflow/core/kernels/cudnn_pooling_gpu.h index ff4de75845..280d697fc2 100644 --- a/tensorflow/core/kernels/cudnn_pooling_gpu.h +++ b/tensorflow/core/kernels/cudnn_pooling_gpu.h @@ -38,7 +38,7 @@ template class DnnPooling3dOp { public: static void Compute(OpKernelContext* context, - perftools::gputools::dnn::PoolingMode pooling_mode, + se::dnn::PoolingMode pooling_mode, const std::array& size, const std::array& stride, const std::array& padding, @@ -52,7 +52,7 @@ template class DnnPooling3dGradOp { public: static void Compute(OpKernelContext* context, - perftools::gputools::dnn::PoolingMode pooling_mode, + se::dnn::PoolingMode pooling_mode, const std::array& window, const std::array& stride, const std::array& padding, diff --git a/tensorflow/core/kernels/cudnn_rnn_ops.cc b/tensorflow/core/kernels/cudnn_rnn_ops.cc index a21f13a4dd..762c2c3666 100644 --- a/tensorflow/core/kernels/cudnn_rnn_ops.cc +++ b/tensorflow/core/kernels/cudnn_rnn_ops.cc @@ -78,7 +78,7 @@ using CPUDevice = Eigen::ThreadPoolDevice; #if GOOGLE_CUDA using GPUDevice = Eigen::GpuDevice; -using ::perftools::gputools::StreamExecutor; +using se::StreamExecutor; template class CudnnRNNParamsSizeOp; @@ -102,21 +102,21 @@ enum class TFRNNInputMode { }; namespace { -using ::perftools::gputools::DeviceMemory; -using ::perftools::gputools::DeviceMemoryBase; -using ::perftools::gputools::ScratchAllocator; -using ::perftools::gputools::Stream; -using ::perftools::gputools::dnn::AlgorithmConfig; -using ::perftools::gputools::dnn::AlgorithmDesc; -using ::perftools::gputools::dnn::ProfileResult; -using ::perftools::gputools::dnn::RnnDescriptor; -using ::perftools::gputools::dnn::RnnDirectionMode; -using ::perftools::gputools::dnn::RnnInputMode; -using ::perftools::gputools::dnn::RnnMode; -using ::perftools::gputools::dnn::RnnSequenceTensorDescriptor; -using ::perftools::gputools::dnn::RnnStateTensorDescriptor; -using ::perftools::gputools::dnn::ToDataType; -using ::perftools::gputools::port::StatusOr; +using se::DeviceMemory; +using se::DeviceMemoryBase; +using se::ScratchAllocator; +using se::Stream; +using se::dnn::AlgorithmConfig; +using se::dnn::AlgorithmDesc; +using se::dnn::ProfileResult; +using se::dnn::RnnDescriptor; +using se::dnn::RnnDirectionMode; +using se::dnn::RnnInputMode; +using se::dnn::RnnMode; +using se::dnn::RnnSequenceTensorDescriptor; +using se::dnn::RnnStateTensorDescriptor; +using se::dnn::ToDataType; +using se::port::StatusOr; Status ParseRNNMode(const string& str, RnnMode* rnn_mode) { if (str == "rnn_relu") { @@ -213,7 +213,7 @@ DeviceMemoryBase SliceDeviceMemory(const DeviceMemoryBase& device_memory, return DeviceMemoryBase(offset_ptr, size); } -inline Status FromExecutorStatus(const perftools::gputools::port::Status& s) { +inline Status FromExecutorStatus(const se::port::Status& s) { return s.ok() ? Status::OK() : Status(static_cast( static_cast(s.code())), @@ -221,17 +221,15 @@ inline Status FromExecutorStatus(const perftools::gputools::port::Status& s) { } template -inline Status FromExecutorStatus( - const perftools::gputools::port::StatusOr& s) { +inline Status FromExecutorStatus(const se::port::StatusOr& s) { return FromExecutorStatus(s.status()); } -inline perftools::gputools::port::Status ToExecutorStatus(const Status& s) { - return s.ok() ? perftools::gputools::port::Status::OK() - : perftools::gputools::port::Status( - static_cast( - static_cast(s.code())), - s.error_message()); +inline se::port::Status ToExecutorStatus(const Status& s) { + return s.ok() ? se::port::Status::OK() + : se::port::Status(static_cast( + static_cast(s.code())), + s.error_message()); } template @@ -503,7 +501,7 @@ Status CreateForwardAndBackwardIODescriptors( std::unique_ptr* state_desc, std::unique_ptr* output_desc) { StreamExecutor* executor = context->op_device_context()->stream()->parent(); - ::perftools::gputools::dnn::DataType data_type = ToDataType::value; + se::dnn::DataType data_type = ToDataType::value; const TensorShape& input_shape = model_shapes.input_shape; const TensorShape& hidden_state_shape = model_shapes.hidden_state_shape; @@ -773,7 +771,7 @@ class CudnnRNNKernelCommon : public OpKernel { ScratchAllocator* dropout_state_allocator, std::unique_ptr* rnn_desc) { StreamExecutor* executor = context->op_device_context()->stream()->parent(); - ::perftools::gputools::dnn::DataType data_type = ToDataType::value; + se::dnn::DataType data_type = ToDataType::value; auto rnn_desc_s = executor->createRnnDescriptor( model_shapes.num_layers, model_shapes.num_units, model_shapes.input_size, input_mode, rnn_direction_mode(), rnn_mode(), diff --git a/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc b/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc index 94989089ec..0abd64030f 100644 --- a/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc +++ b/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc @@ -1708,8 +1708,7 @@ void LaunchDepthwiseConvBackpropFilterOp::operator()( // Initialize the results to 0. int num_filter_backprop = args.filter_rows * args.filter_cols * args.out_depth; - perftools::gputools::DeviceMemoryBase filter_bp_ptr(filter_backprop, - num_filter_backprop); + se::DeviceMemoryBase filter_bp_ptr(filter_backprop, num_filter_backprop); stream->ThenMemset32(&filter_bp_ptr, 0, num_filter_backprop * sizeof(T)); if (args.filter_rows == 3 && args.filter_cols == 3) { diff --git a/tensorflow/core/kernels/dynamic_partition_op_gpu.cu.cc b/tensorflow/core/kernels/dynamic_partition_op_gpu.cu.cc index 9dfeccff0e..862a97723f 100644 --- a/tensorflow/core/kernels/dynamic_partition_op_gpu.cu.cc +++ b/tensorflow/core/kernels/dynamic_partition_op_gpu.cu.cc @@ -285,8 +285,8 @@ class DynamicPartitionOpGPU : public AsyncOpKernel { c->allocate_temp(partition_count.dtype(), partition_count.shape(), &cpu_tensor, alloc_attr), done); - perftools::gputools::DeviceMemoryBase wrapped( - partition_count.flat().data(), num_partitions_ * sizeof(int32)); + se::DeviceMemoryBase wrapped(partition_count.flat().data(), + num_partitions_ * sizeof(int32)); const bool status = stream ->ThenMemcpy(cpu_tensor.flat().data(), wrapped, diff --git a/tensorflow/core/kernels/fft_ops.cc b/tensorflow/core/kernels/fft_ops.cc index ab5af8caad..661bf5fc5f 100644 --- a/tensorflow/core/kernels/fft_ops.cc +++ b/tensorflow/core/kernels/fft_ops.cc @@ -277,20 +277,19 @@ REGISTER_KERNEL_BUILDER(Name("IRFFT3D").Device(DEVICE_CPU).Label(FFT_LABEL), #undef FFT_LABEL #if GOOGLE_CUDA -namespace gpu = ::perftools::gputools; namespace { template -gpu::DeviceMemory AsDeviceMemory(const T* cuda_memory) { - gpu::DeviceMemoryBase wrapped(const_cast(cuda_memory)); - gpu::DeviceMemory typed(wrapped); +se::DeviceMemory AsDeviceMemory(const T* cuda_memory) { + se::DeviceMemoryBase wrapped(const_cast(cuda_memory)); + se::DeviceMemory typed(wrapped); return typed; } template -gpu::DeviceMemory AsDeviceMemory(const T* cuda_memory, uint64 size) { - gpu::DeviceMemoryBase wrapped(const_cast(cuda_memory), size * sizeof(T)); - gpu::DeviceMemory typed(wrapped); +se::DeviceMemory AsDeviceMemory(const T* cuda_memory, uint64 size) { + se::DeviceMemoryBase wrapped(const_cast(cuda_memory), size * sizeof(T)); + se::DeviceMemory typed(wrapped); return typed; } @@ -299,19 +298,19 @@ gpu::DeviceMemory AsDeviceMemory(const T* cuda_memory, uint64 size) { // the kernel finishes. // TODO(yangzihao): Refactor redundant code in subclasses of ScratchAllocator // into base class. -class CufftScratchAllocator : public gpu::ScratchAllocator { +class CufftScratchAllocator : public se::ScratchAllocator { public: ~CufftScratchAllocator() override {} CufftScratchAllocator(int64 memory_limit, OpKernelContext* context) : memory_limit_(memory_limit), total_byte_size_(0), context_(context) {} - int64 GetMemoryLimitInBytes(gpu::Stream* stream) override { + int64 GetMemoryLimitInBytes(se::Stream* stream) override { return memory_limit_; } - gpu::port::StatusOr> AllocateBytes( - gpu::Stream* stream, int64 byte_size) override { + se::port::StatusOr> AllocateBytes( + se::Stream* stream, int64 byte_size) override { Tensor temporary_memory; if (byte_size > memory_limit_) { - return gpu::port::StatusOr>(); + return se::port::StatusOr>(); } AllocationAttributes allocation_attr; allocation_attr.no_retry_on_failure = true; @@ -319,13 +318,13 @@ class CufftScratchAllocator : public gpu::ScratchAllocator { DT_UINT8, TensorShape({byte_size}), &temporary_memory, AllocatorAttributes(), allocation_attr)); if (!allocation_status.ok()) { - return gpu::port::StatusOr>(); + return se::port::StatusOr>(); } // Hold the reference of the allocated tensors until the end of the // allocator. allocated_tensors_.push_back(temporary_memory); total_byte_size_ += byte_size; - return gpu::port::StatusOr>( + return se::port::StatusOr>( AsDeviceMemory(temporary_memory.flat().data(), temporary_memory.flat().size())); } @@ -394,9 +393,9 @@ class FFTGPUBase : public FFTBase { constexpr bool kInPlaceFft = false; const auto kFftType = - IsReal() ? (IsForward() ? gpu::fft::Type::kR2C : gpu::fft::Type::kC2R) - : (IsForward() ? gpu::fft::Type::kC2CForward - : gpu::fft::Type::kC2CInverse); + IsReal() ? (IsForward() ? se::fft::Type::kR2C : se::fft::Type::kC2R) + : (IsForward() ? se::fft::Type::kC2CForward + : se::fft::Type::kC2CInverse); CufftScratchAllocator scratch_allocator(CufftScratchSize, ctx); auto plan = diff --git a/tensorflow/core/kernels/fused_batch_norm_op.cc b/tensorflow/core/kernels/fused_batch_norm_op.cc index 9b4dca8511..f99dd643f7 100644 --- a/tensorflow/core/kernels/fused_batch_norm_op.cc +++ b/tensorflow/core/kernels/fused_batch_norm_op.cc @@ -251,7 +251,7 @@ struct FusedBatchNorm { Tensor x_maybe_transformed = x; Tensor x_transformed; Tensor y_transformed; - perftools::gputools::DeviceMemory y_ptr; + se::DeviceMemory y_ptr; if (tensor_format == FORMAT_NCHW) { y_ptr = StreamExecutorUtil::AsDeviceMemory(*y); @@ -279,19 +279,19 @@ struct FusedBatchNorm { return; } - perftools::gputools::dnn::BatchDescriptor x_desc; + se::dnn::BatchDescriptor x_desc; x_desc.set_count(batch_size) .set_feature_map_count(channels) .set_height(height) .set_width(width) - .set_layout(perftools::gputools::dnn::DataLayout::kBatchDepthYX); + .set_layout(se::dnn::DataLayout::kBatchDepthYX); - perftools::gputools::dnn::BatchDescriptor scale_offset_desc; + se::dnn::BatchDescriptor scale_offset_desc; scale_offset_desc.set_count(1) .set_feature_map_count(channels) .set_height(1) .set_width(1) - .set_layout(perftools::gputools::dnn::DataLayout::kBatchDepthYX); + .set_layout(se::dnn::DataLayout::kBatchDepthYX); auto x_ptr = StreamExecutorUtil::AsDeviceMemory(x_maybe_transformed); auto scale_ptr = StreamExecutorUtil::AsDeviceMemory(scale); @@ -308,7 +308,7 @@ struct FusedBatchNorm { StreamExecutorUtil::AsDeviceMemory(*saved_inv_var); GPUDevice d = context->eigen_device(); - using perftools::gputools::DeviceMemory; + using se::DeviceMemory; Tensor inv_var; OP_REQUIRES_OK( context, context->allocate_temp(DataTypeToEnum::value, @@ -390,7 +390,7 @@ struct FusedBatchNormGrad { // Outputs Tensor x_backprop_transformed; - perftools::gputools::DeviceMemory x_backprop_ptr; + se::DeviceMemory x_backprop_ptr; if (tensor_format == FORMAT_NCHW) { x_backprop_ptr = StreamExecutorUtil::AsDeviceMemory(*x_backprop); @@ -433,19 +433,19 @@ struct FusedBatchNormGrad { return; } - perftools::gputools::dnn::BatchDescriptor x_desc; + se::dnn::BatchDescriptor x_desc; x_desc.set_count(batch_size) .set_feature_map_count(channels) .set_height(height) .set_width(width) - .set_layout(perftools::gputools::dnn::DataLayout::kBatchDepthYX); + .set_layout(se::dnn::DataLayout::kBatchDepthYX); - perftools::gputools::dnn::BatchDescriptor scale_offset_desc; + se::dnn::BatchDescriptor scale_offset_desc; scale_offset_desc.set_count(1) .set_feature_map_count(channels) .set_height(1) .set_width(1) - .set_layout(perftools::gputools::dnn::DataLayout::kBatchDepthYX); + .set_layout(se::dnn::DataLayout::kBatchDepthYX); auto y_backprop_ptr = StreamExecutorUtil::AsDeviceMemory(y_backprop_maybe_transformed); diff --git a/tensorflow/core/kernels/gpu_utils.h b/tensorflow/core/kernels/gpu_utils.h index ffc733e6bb..2f64619afc 100644 --- a/tensorflow/core/kernels/gpu_utils.h +++ b/tensorflow/core/kernels/gpu_utils.h @@ -29,11 +29,9 @@ limitations under the License. namespace tensorflow { template -inline perftools::gputools::DeviceMemory AsDeviceMemory(const T* cuda_memory, - uint64 size) { - perftools::gputools::DeviceMemoryBase wrapped(const_cast(cuda_memory), - size * sizeof(T)); - perftools::gputools::DeviceMemory typed(wrapped); +inline se::DeviceMemory AsDeviceMemory(const T* cuda_memory, uint64 size) { + se::DeviceMemoryBase wrapped(const_cast(cuda_memory), size * sizeof(T)); + se::DeviceMemory typed(wrapped); return typed; } diff --git a/tensorflow/core/kernels/lrn_op.cc b/tensorflow/core/kernels/lrn_op.cc index c3a59c9576..b4252eb044 100644 --- a/tensorflow/core/kernels/lrn_op.cc +++ b/tensorflow/core/kernels/lrn_op.cc @@ -187,14 +187,14 @@ struct LaunchLRN { const int cols = static_cast(in.dim_size(2)); const int depth = static_cast(in.dim_size(3)); - perftools::gputools::dnn::BatchDescriptor dimensions_desc; + se::dnn::BatchDescriptor dimensions_desc; dimensions_desc.set_count(batch) .set_height(rows) .set_width(cols) .set_feature_map_count(depth) - .set_layout(perftools::gputools::dnn::DataLayout::kBatchYXDepth); + .set_layout(se::dnn::DataLayout::kBatchYXDepth); - perftools::gputools::dnn::NormalizeDescriptor normalize_desc; + se::dnn::NormalizeDescriptor normalize_desc; normalize_desc.set_bias(bias_) .set_range(depth_radius_) .set_alpha(alpha_) @@ -404,14 +404,14 @@ struct LaunchLRNGrad { const int64 cols = in_grads.dim_size(2); const int64 depth = in_grads.dim_size(3); - perftools::gputools::dnn::BatchDescriptor dimensions_desc; + se::dnn::BatchDescriptor dimensions_desc; dimensions_desc.set_count(batch) .set_height(rows) .set_width(cols) .set_feature_map_count(depth) - .set_layout(perftools::gputools::dnn::DataLayout::kBatchYXDepth); + .set_layout(se::dnn::DataLayout::kBatchYXDepth); - perftools::gputools::dnn::NormalizeDescriptor normalize_desc; + se::dnn::NormalizeDescriptor normalize_desc; normalize_desc.set_bias(bias_) .set_range(depth_radius_) .set_alpha(alpha_) diff --git a/tensorflow/core/kernels/matmul_op.cc b/tensorflow/core/kernels/matmul_op.cc index f499ce6519..3664f95c3b 100644 --- a/tensorflow/core/kernels/matmul_op.cc +++ b/tensorflow/core/kernels/matmul_op.cc @@ -112,7 +112,7 @@ bool ExplicitVectorMatrixOptimization( template struct LaunchMatMulBase { #if GOOGLE_CUDA - typedef perftools::gputools::blas::AlgorithmType AlgorithmType; + typedef se::blas::AlgorithmType AlgorithmType; #else typedef int64 AlgorithmType; #endif // GOOGLE_CUDA @@ -160,15 +160,12 @@ namespace { template struct LaunchBlasGemv { - static void Compute( - OpKernelContext* ctx, perftools::gputools::Stream* stream, bool trans, - uint64 m, uint64 n, const perftools::gputools::DeviceMemory& a, - const perftools::gputools::DeviceMemory& b, - perftools::gputools::DeviceMemory* c, - perftools::gputools::blas::ProfileResult* output_profile) { - const auto blas_trans = - trans ? perftools::gputools::blas::Transpose::kTranspose - : perftools::gputools::blas::Transpose::kNoTranspose; + static void Compute(OpKernelContext* ctx, se::Stream* stream, bool trans, + uint64 m, uint64 n, const se::DeviceMemory& a, + const se::DeviceMemory& b, se::DeviceMemory* c, + se::blas::ProfileResult* output_profile) { + const auto blas_trans = trans ? se::blas::Transpose::kTranspose + : se::blas::Transpose::kNoTranspose; if (output_profile == nullptr) { bool blas_launch_status = stream @@ -198,11 +195,10 @@ struct LaunchBlasGemv { template <> void LaunchBlasGemv::Compute( - OpKernelContext* ctx, perftools::gputools::Stream* stream, bool trans, - uint64 m, uint64 n, const perftools::gputools::DeviceMemory& a, - const perftools::gputools::DeviceMemory& b, - perftools::gputools::DeviceMemory* c, - perftools::gputools::blas::ProfileResult* output_profile) { + OpKernelContext* ctx, se::Stream* stream, bool trans, uint64 m, uint64 n, + const se::DeviceMemory& a, + const se::DeviceMemory& b, se::DeviceMemory* c, + se::blas::ProfileResult* output_profile) { ctx->SetStatus(errors::Internal( "Blas GEMV launch failed: GEMV is not implemented for float16.")); } @@ -219,10 +215,9 @@ bool ShouldUseGemv(uint64 n) { } // namespace -bool GetCublasAutotuneComputationType( - const DataType& dtype, - perftools::gputools::blas::ComputationType* compute_type) { - using perftools::gputools::blas::ComputationType; +bool GetCublasAutotuneComputationType(const DataType& dtype, + se::blas::ComputationType* compute_type) { + using se::blas::ComputationType; bool use_f32_for_f16_computation = MatmulDoFP32ComputationFP16Input(); switch (dtype) { case DT_HALF: @@ -250,7 +245,7 @@ struct MatmulAutoTuneGroup { static string name() { return "Matmul"; } }; typedef AutoTuneSingleton + se::blas::AlgorithmConfig> AutoTuneMatmul; template @@ -259,14 +254,14 @@ struct LaunchMatMul { OpKernelContext* ctx, const Tensor& a, const Tensor& b, const Eigen::array, 1>& dim_pair, std::vector* algorithms, bool use_autotune, Tensor* out) { - using perftools::gputools::blas::AlgorithmConfig; - using perftools::gputools::blas::ComputationType; - using perftools::gputools::blas::kDefaultAlgorithm; - using perftools::gputools::blas::kDefaultBlasGemm; - using perftools::gputools::blas::kDefaultBlasGemv; - using perftools::gputools::blas::kNoAlgorithm; - using perftools::gputools::blas::ProfileResult; - using perftools::gputools::blas::Transpose; + using se::blas::AlgorithmConfig; + using se::blas::ComputationType; + using se::blas::kDefaultAlgorithm; + using se::blas::kDefaultBlasGemm; + using se::blas::kDefaultBlasGemv; + using se::blas::kNoAlgorithm; + using se::blas::ProfileResult; + using se::blas::Transpose; Transpose trans[] = {Transpose::kNoTranspose, Transpose::kTranspose}; const uint64 m = a.dim_size(1 - dim_pair[0].first); const uint64 k = a.dim_size(dim_pair[0].first); diff --git a/tensorflow/core/kernels/matrix_triangular_solve_op.cc b/tensorflow/core/kernels/matrix_triangular_solve_op.cc index 6f7e6a7496..5de0d1118a 100644 --- a/tensorflow/core/kernels/matrix_triangular_solve_op.cc +++ b/tensorflow/core/kernels/matrix_triangular_solve_op.cc @@ -34,11 +34,9 @@ namespace tensorflow { #if GOOGLE_CUDA namespace { template -perftools::gputools::DeviceMemory AsDeviceMemory( - const Scalar* cuda_memory) { - perftools::gputools::DeviceMemoryBase wrapped( - const_cast(cuda_memory)); - perftools::gputools::DeviceMemory typed(wrapped); +se::DeviceMemory AsDeviceMemory(const Scalar* cuda_memory) { + se::DeviceMemoryBase wrapped(const_cast(cuda_memory)); + se::DeviceMemory typed(wrapped); return typed; } } // namespace @@ -204,18 +202,17 @@ class MatrixTriangularSolveOpGPU : public LinearAlgebraOp { // output' = rhs' / matrix' (' stands for transpose) // Upper/lower needs to be swapped for this. - perftools::gputools::blas::UpperLower upper_lower_matrix; - perftools::gputools::blas::Transpose transpose_matrix; + se::blas::UpperLower upper_lower_matrix; + se::blas::Transpose transpose_matrix; if (lower_) { - upper_lower_matrix = perftools::gputools::blas::UpperLower::kUpper; + upper_lower_matrix = se::blas::UpperLower::kUpper; } else { - upper_lower_matrix = perftools::gputools::blas::UpperLower::kLower; + upper_lower_matrix = se::blas::UpperLower::kLower; } if (adjoint_) { - transpose_matrix = - perftools::gputools::blas::Transpose::kConjugateTranspose; + transpose_matrix = se::blas::Transpose::kConjugateTranspose; } else { - transpose_matrix = perftools::gputools::blas::Transpose::kNoTranspose; + transpose_matrix = se::blas::Transpose::kNoTranspose; } uint64 leading_dim_matrix = matrix.cols(); uint64 leading_dim_output = output.cols(); @@ -224,11 +221,11 @@ class MatrixTriangularSolveOpGPU : public LinearAlgebraOp { bool blas_launch_status = stream ->ThenBlasTrsm( - perftools::gputools::blas::Side::kRight /*side*/, - upper_lower_matrix /*uplo*/, transpose_matrix /*trans*/, - perftools::gputools::blas::Diagonal::kNonUnit /*diag*/, - colmajor_rows /*m*/, colmajor_cols /*n*/, Scalar(1.0) /*alpha*/, - matrix_ptr, leading_dim_matrix /*lda*/, &out_ptr, + se::blas::Side::kRight /*side*/, upper_lower_matrix /*uplo*/, + transpose_matrix /*trans*/, + se::blas::Diagonal::kNonUnit /*diag*/, colmajor_rows /*m*/, + colmajor_cols /*n*/, Scalar(1.0) /*alpha*/, matrix_ptr, + leading_dim_matrix /*lda*/, &out_ptr, leading_dim_output /*ldb*/) .ok(); if (!blas_launch_status) { diff --git a/tensorflow/core/kernels/maxpooling_op.cc b/tensorflow/core/kernels/maxpooling_op.cc index aaaf45d3e7..507fc99837 100644 --- a/tensorflow/core/kernels/maxpooling_op.cc +++ b/tensorflow/core/kernels/maxpooling_op.cc @@ -404,10 +404,10 @@ class MaxPoolingGradOp : public OpKernel { "Pooling is not yet supported on the batch dimension.")); if (use_dnn_) { - DnnPoolingGradOp::Compute( - context, perftools::gputools::dnn::PoolingMode::kMaximum, ksize, - stride, padding_, data_format_, &tensor_in, &tensor_out, out_backprop, - output_shape, propagate_nans_); + DnnPoolingGradOp::Compute(context, se::dnn::PoolingMode::kMaximum, + ksize, stride, padding_, data_format_, + &tensor_in, &tensor_out, out_backprop, + output_shape, propagate_nans_); } else { CHECK(data_format_ == FORMAT_NHWC) << "Non-Cudnn MaxPoolGrad only supports NHWC format"; @@ -1136,10 +1136,9 @@ class MaxPoolingNoMaskOp : public OpKernel { // These is_int8x4 checks avoid linker errors for missing qint8 kernels. if (!is_int8x4 && use_dnn_ && data_format_ == FORMAT_NCHW) { - DnnPoolingOp::Compute(context, - perftools::gputools::dnn::PoolingMode::kMaximum, - ksize_, stride_, padding_, data_format_, - tensor_in, out_shape, propagate_nans_); + DnnPoolingOp::Compute(context, se::dnn::PoolingMode::kMaximum, ksize_, + stride_, padding_, data_format_, tensor_in, + out_shape, propagate_nans_); } else { Tensor* output = nullptr; OP_REQUIRES_OK(context, context->allocate_output(0, out_shape, &output)); @@ -1240,9 +1239,8 @@ class MaxPoolingNoMaskV2Op : public OpKernel { ShapeFromFormat(data_format_, params.tensor_in_batch, params.out_height, params.out_width, params.depth); if (use_dnn_ && data_format_ == FORMAT_NCHW) { - DnnPoolingOp::Compute(context, - perftools::gputools::dnn::PoolingMode::kMaximum, - ksize, stride, padding_, data_format_, tensor_in, + DnnPoolingOp::Compute(context, se::dnn::PoolingMode::kMaximum, ksize, + stride, padding_, data_format_, tensor_in, out_shape, propagate_nans_); } else { CHECK(data_format_ == FORMAT_NHWC) diff --git a/tensorflow/core/kernels/pooling_ops_3d.cc b/tensorflow/core/kernels/pooling_ops_3d.cc index 01bcfede1e..2180c4eb97 100644 --- a/tensorflow/core/kernels/pooling_ops_3d.cc +++ b/tensorflow/core/kernels/pooling_ops_3d.cc @@ -748,9 +748,8 @@ struct LaunchPoolingOp { const std::array& padding, TensorFormat data_format, Padding padding_type, Tensor* output) { - DnnPooling3dOp::Compute( - context, perftools::gputools::dnn::PoolingMode::kAverage, window, - stride, padding, data_format, tensor_in, output); + DnnPooling3dOp::Compute(context, se::dnn::PoolingMode::kAverage, window, + stride, padding, data_format, tensor_in, output); } }; @@ -762,9 +761,8 @@ struct LaunchPoolingOp { const std::array& padding, TensorFormat data_format, Padding padding_type, Tensor* output) { - DnnPooling3dOp::Compute( - context, perftools::gputools::dnn::PoolingMode::kMaximum, window, - stride, padding, data_format, tensor_in, output); + DnnPooling3dOp::Compute(context, se::dnn::PoolingMode::kMaximum, window, + stride, padding, data_format, tensor_in, output); } }; @@ -778,10 +776,10 @@ struct LaunchMaxPooling3dGradOp { const std::array& padding, TensorFormat data_format, Tensor* input_backprop) { const TensorShape output_shape = tensor_in.shape(); - DnnPooling3dGradOp::Compute( - context, perftools::gputools::dnn::PoolingMode::kMaximum, window, - stride, padding, out, data_format, out_backprop, output_shape, - &tensor_in, &tensor_out, input_backprop); + DnnPooling3dGradOp::Compute(context, se::dnn::PoolingMode::kMaximum, + window, stride, padding, out, data_format, + out_backprop, output_shape, &tensor_in, + &tensor_out, input_backprop); } }; @@ -796,9 +794,8 @@ struct LaunchAvgPooling3dGradOp { const std::array& padding, TensorFormat data_format, Tensor* output) { DnnPooling3dGradOp::Compute( - context, perftools::gputools::dnn::PoolingMode::kAverage, window, - stride, padding, out, data_format, out_backprop, tensor_in_shape, - nullptr, nullptr, output); + context, se::dnn::PoolingMode::kAverage, window, stride, padding, out, + data_format, out_backprop, tensor_in_shape, nullptr, nullptr, output); } }; diff --git a/tensorflow/core/kernels/pooling_ops_common.cc b/tensorflow/core/kernels/pooling_ops_common.cc index d4241b5809..e583f7feb4 100644 --- a/tensorflow/core/kernels/pooling_ops_common.cc +++ b/tensorflow/core/kernels/pooling_ops_common.cc @@ -114,11 +114,9 @@ TensorShape PoolParameters::forward_output_shape() { namespace { template -perftools::gputools::DeviceMemory AsDeviceMemory(const T* cuda_memory, - uint64 size) { - perftools::gputools::DeviceMemoryBase wrapped(const_cast(cuda_memory), - size * sizeof(T)); - perftools::gputools::DeviceMemory typed(wrapped); +se::DeviceMemory AsDeviceMemory(const T* cuda_memory, uint64 size) { + se::DeviceMemoryBase wrapped(const_cast(cuda_memory), size * sizeof(T)); + se::DeviceMemory typed(wrapped); return typed; } } // namespace @@ -138,12 +136,13 @@ TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPEC) } // namespace functor template -void DnnPoolingOp::Compute( - OpKernelContext* context, - perftools::gputools::dnn::PoolingMode pooling_mode, - const std::vector& size, const std::vector& stride, - Padding padding, TensorFormat data_format, const Tensor& tensor_in, - const TensorShape& tensor_out_shape, bool propagate_nans) { +void DnnPoolingOp::Compute(OpKernelContext* context, + se::dnn::PoolingMode pooling_mode, + const std::vector& size, + const std::vector& stride, Padding padding, + TensorFormat data_format, const Tensor& tensor_in, + const TensorShape& tensor_out_shape, + bool propagate_nans) { Tensor* tensor_out = nullptr; OP_REQUIRES_OK(context, context->allocate_output(0, tensor_out_shape, &tensor_out)); @@ -184,7 +183,7 @@ void DnnPoolingOp::Compute( } /// Get ready to call cudnn - perftools::gputools::dnn::PoolingDescriptor pooling_desc; + se::dnn::PoolingDescriptor pooling_desc; pooling_desc.set_pooling_mode(pooling_mode) .set_window_height(params.window_rows) .set_window_width(params.window_cols) @@ -194,19 +193,19 @@ void DnnPoolingOp::Compute( .set_horizontal_padding(params.pad_cols) .set_propagate_nans(propagate_nans); - perftools::gputools::dnn::BatchDescriptor input_desc; + se::dnn::BatchDescriptor input_desc; input_desc.set_count(params.tensor_in_batch) .set_height(params.tensor_in_rows) .set_width(params.tensor_in_cols) .set_feature_map_count(params.depth) - .set_layout(perftools::gputools::dnn::DataLayout::kBatchDepthYX); + .set_layout(se::dnn::DataLayout::kBatchDepthYX); - perftools::gputools::dnn::BatchDescriptor output_desc; + se::dnn::BatchDescriptor output_desc; output_desc.set_count(params.tensor_in_batch) .set_height(params.out_height) .set_width(params.out_width) .set_feature_map_count(params.depth) - .set_layout(perftools::gputools::dnn::DataLayout::kBatchDepthYX); + .set_layout(se::dnn::DataLayout::kBatchDepthYX); auto input_data = AsDeviceMemory(transformed_input.template flat().data(), transformed_input.template flat().size()); @@ -236,13 +235,12 @@ void DnnPoolingOp::Compute( template void DnnPoolingGradOp::Compute( - OpKernelContext* context, - perftools::gputools::dnn::PoolingMode pooling_mode, + OpKernelContext* context, se::dnn::PoolingMode pooling_mode, const std::vector& size, const std::vector& stride, Padding padding, TensorFormat data_format, const Tensor* tensor_in, const Tensor* tensor_out, const Tensor& out_backprop, const TensorShape& tensor_in_shape, bool propagate_nans) { - CHECK((pooling_mode != perftools::gputools::dnn::PoolingMode::kMaximum) || + CHECK((pooling_mode != se::dnn::PoolingMode::kMaximum) || (tensor_in && tensor_out)) << "For MaxPoolGrad, both tensor_in and tensor_out needs to be " "specified"; @@ -327,7 +325,7 @@ void DnnPoolingGradOp::Compute( } /// Get ready to call cudnn - perftools::gputools::dnn::PoolingDescriptor pooling_desc; + se::dnn::PoolingDescriptor pooling_desc; pooling_desc.set_pooling_mode(pooling_mode) .set_window_height(params.window_rows) .set_window_width(params.window_cols) @@ -337,19 +335,19 @@ void DnnPoolingGradOp::Compute( .set_horizontal_padding(params.pad_cols) .set_propagate_nans(propagate_nans); - perftools::gputools::dnn::BatchDescriptor orig_output_desc; + se::dnn::BatchDescriptor orig_output_desc; orig_output_desc.set_count(params.tensor_in_batch) .set_height(params.out_height) .set_width(params.out_width) .set_feature_map_count(params.depth) - .set_layout(perftools::gputools::dnn::DataLayout::kBatchDepthYX); + .set_layout(se::dnn::DataLayout::kBatchDepthYX); - perftools::gputools::dnn::BatchDescriptor orig_input_desc; + se::dnn::BatchDescriptor orig_input_desc; orig_input_desc.set_count(params.tensor_in_batch) .set_height(params.tensor_in_rows) .set_width(params.tensor_in_cols) .set_feature_map_count(params.depth) - .set_layout(perftools::gputools::dnn::DataLayout::kBatchDepthYX); + .set_layout(se::dnn::DataLayout::kBatchDepthYX); auto orig_output_data = AsDeviceMemory(transformed_output.template flat().data(), diff --git a/tensorflow/core/kernels/pooling_ops_common_gpu.h b/tensorflow/core/kernels/pooling_ops_common_gpu.h index 1458456585..7362c5275f 100644 --- a/tensorflow/core/kernels/pooling_ops_common_gpu.h +++ b/tensorflow/core/kernels/pooling_ops_common_gpu.h @@ -40,7 +40,7 @@ class DnnPoolingOp { public: typedef GPUDevice Device; static void Compute(OpKernelContext* context, - perftools::gputools::dnn::PoolingMode pooling_mode, + se::dnn::PoolingMode pooling_mode, const std::vector& size, const std::vector& stride, Padding padding, TensorFormat data_format, const Tensor& tensor_in, @@ -55,7 +55,7 @@ class DnnPoolingGradOp { public: typedef GPUDevice Device; static void Compute(OpKernelContext* context, - perftools::gputools::dnn::PoolingMode pooling_mode, + se::dnn::PoolingMode pooling_mode, const std::vector& size, const std::vector& stride, Padding padding, TensorFormat data_format, const Tensor* tensor_in, diff --git a/tensorflow/core/kernels/segment_reduction_ops.cc b/tensorflow/core/kernels/segment_reduction_ops.cc index 2fc73a3309..c87ce78e05 100644 --- a/tensorflow/core/kernels/segment_reduction_ops.cc +++ b/tensorflow/core/kernels/segment_reduction_ops.cc @@ -40,7 +40,7 @@ limitations under the License. #include "tensorflow/core/kernels/cuda_solvers.h" #include "tensorflow/core/platform/cuda.h" -using ::perftools::gputools::cuda::ScopedActivateExecutorContext; +using stream_executor::cuda::ScopedActivateExecutorContext; #endif // GOOGLE_CUDA namespace tensorflow { @@ -242,7 +242,7 @@ class SegmentSumGPUOp : public AsyncOpKernel { return; } - perftools::gputools::DeviceMemoryBase output_rows_device( + se::DeviceMemoryBase output_rows_device( const_cast(segment_ids).template flat().data() + (num_indices - 1)); ScratchSpace output_rows_host(context, 1, /* on_host */ true); diff --git a/tensorflow/core/kernels/where_op.cc b/tensorflow/core/kernels/where_op.cc index f92c4ed17a..3330442ffd 100644 --- a/tensorflow/core/kernels/where_op.cc +++ b/tensorflow/core/kernels/where_op.cc @@ -42,7 +42,7 @@ limitations under the License. #include "tensorflow/core/kernels/cuda_solvers.h" #include "tensorflow/core/platform/cuda.h" -using ::perftools::gputools::cuda::ScopedActivateExecutorContext; +using stream_executor::cuda::ScopedActivateExecutorContext; #endif // GOOGLE_CUDA namespace tensorflow { @@ -278,8 +278,7 @@ class WhereGPUOp : public AsyncOpKernel { auto num_true_t = num_true.scalar(); - perftools::gputools::DeviceMemoryBase num_true_ptr( - static_cast(num_true_t.data())); + se::DeviceMemoryBase num_true_ptr(static_cast(num_true_t.data())); // Push kernel to stream to get number of true elements. const GPUDevice& d = context->eigen_device(); Status s = functor::NumTrue::Compute( diff --git a/tensorflow/core/platform/default/gpu/cupti_wrapper.cc b/tensorflow/core/platform/default/gpu/cupti_wrapper.cc index 580db4844f..7ac5e5c445 100644 --- a/tensorflow/core/platform/default/gpu/cupti_wrapper.cc +++ b/tensorflow/core/platform/default/gpu/cupti_wrapper.cc @@ -28,27 +28,27 @@ namespace profiler { namespace dynload { -#define LIBCUPTI_WRAP(__name) \ - struct DynLoadShim__##__name { \ - static const char* kName; \ - using FuncPointerT = std::add_pointer::type; \ - static void* GetDsoHandle() { \ - static auto status = perftools::gputools::internal::CachedDsoLoader:: \ - GetLibcuptiDsoHandle(); \ - return status.ValueOrDie(); \ - } \ - static FuncPointerT DynLoad() { \ - static void* f; \ - TF_CHECK_OK(::tensorflow::Env::Default()->GetSymbolFromLibrary( \ - GetDsoHandle(), kName, &f)) \ - << "could not find " << kName << "in libcupti DSO"; \ - return reinterpret_cast(f); \ - } \ - template \ - CUptiResult operator()(Args... args) { \ - return DynLoad()(args...); \ - } \ - } __name; \ +#define LIBCUPTI_WRAP(__name) \ + struct DynLoadShim__##__name { \ + static const char* kName; \ + using FuncPointerT = std::add_pointer::type; \ + static void* GetDsoHandle() { \ + static auto status = \ + stream_executor::internal::CachedDsoLoader::GetLibcuptiDsoHandle(); \ + return status.ValueOrDie(); \ + } \ + static FuncPointerT DynLoad() { \ + static void* f; \ + TF_CHECK_OK(::tensorflow::Env::Default()->GetSymbolFromLibrary( \ + GetDsoHandle(), kName, &f)) \ + << "could not find " << kName << "in libcupti DSO"; \ + return reinterpret_cast(f); \ + } \ + template \ + CUptiResult operator()(Args... args) { \ + return DynLoad()(args...); \ + } \ + } __name; \ const char* DynLoadShim__##__name::kName = #__name; LIBCUPTI_WRAP(cuptiActivityDisable); diff --git a/tensorflow/core/platform/types.h b/tensorflow/core/platform/types.h index f2471712cc..68897ac423 100644 --- a/tensorflow/core/platform/types.h +++ b/tensorflow/core/platform/types.h @@ -63,9 +63,7 @@ typedef uint64 Fprint; // Alias namespace ::stream_executor as ::tensorflow::se. namespace stream_executor {} namespace tensorflow { -// TODO(b/77980417): Uncomment this once all namespace aliases named 'se' are -// removed in ::xla. -// namespace se = ::stream_executor; +namespace se = ::stream_executor; } // namespace tensorflow #endif // TENSORFLOW_PLATFORM_TYPES_H_ -- GitLab From 7bee86727b87a8317d4f1407061edfa9ccb16ea5 Mon Sep 17 00:00:00 2001 From: Igor Ganichev Date: Mon, 23 Apr 2018 19:35:12 -0700 Subject: [PATCH 183/434] Don't Ref() XlaDeviceContext unnecessarily. PiperOrigin-RevId: 194024407 --- tensorflow/compiler/jit/xla_device.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/compiler/jit/xla_device.cc b/tensorflow/compiler/jit/xla_device.cc index 7beb18c04d..3e27cd39c6 100644 --- a/tensorflow/compiler/jit/xla_device.cc +++ b/tensorflow/compiler/jit/xla_device.cc @@ -234,7 +234,6 @@ Status XlaDevice::CreateAndSetGpuDeviceInfo() { gpu_device_info_->stream = stream; gpu_device_info_->default_context = new XlaDeviceContext(stream, client(), transfer_as_literal_); - gpu_device_info_->default_context->Ref(); set_tensorflow_gpu_device_info(gpu_device_info_.get()); } -- GitLab From 3f7c9265b59cae306d029dfac76e25badd20def8 Mon Sep 17 00:00:00 2001 From: Sung Jin Hwang Date: Mon, 23 Apr 2018 19:35:19 -0700 Subject: [PATCH 184/434] Add missing pmf_to_cdf_op.cc in the source list in cmake. Also split range_coder_ops.cc and range_coder_ops_util.cc into separate targets so that dependence to range_coder_ops_util.cc does not register kernels again. PiperOrigin-RevId: 194024410 --- tensorflow/contrib/coder/BUILD | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/tensorflow/contrib/coder/BUILD b/tensorflow/contrib/coder/BUILD index a146460a9c..a2c6e41303 100644 --- a/tensorflow/contrib/coder/BUILD +++ b/tensorflow/contrib/coder/BUILD @@ -54,19 +54,27 @@ tf_gen_op_libs( ], ) +cc_library( + name = "range_coder_ops_util", + srcs = ["kernels/range_coder_ops_util.cc"], + hdrs = ["kernels/range_coder_ops_util.h"], + visibility = ["//visibility:public"], + deps = [ + "//tensorflow/core:framework", + "//tensorflow/core:lib", + ], +) + tf_kernel_library( name = "range_coder_ops", srcs = [ "kernels/range_coder_ops.cc", - "kernels/range_coder_ops_util.cc", - ], - hdrs = [ - "kernels/range_coder_ops_util.h", ], visibility = ["//visibility:public"], deps = [ ":coder_ops_op_lib", ":range_coder", + ":range_coder_ops_util", "//tensorflow/core:framework", "//tensorflow/core:lib", ], -- GitLab From 24b7c9a800ab5086d45a7d83ebcd6218424dc9e3 Mon Sep 17 00:00:00 2001 From: Yuefeng Zhou Date: Mon, 23 Apr 2018 20:15:30 -0700 Subject: [PATCH 185/434] Make all_reduce._split_by_task function able to deal with different jobs. PiperOrigin-RevId: 194027134 --- .../contrib/all_reduce/python/all_reduce.py | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/tensorflow/contrib/all_reduce/python/all_reduce.py b/tensorflow/contrib/all_reduce/python/all_reduce.py index 8add2aacff..159d985db5 100644 --- a/tensorflow/contrib/all_reduce/python/all_reduce.py +++ b/tensorflow/contrib/all_reduce/python/all_reduce.py @@ -18,10 +18,11 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import collections import math -import re from tensorflow.contrib import nccl +from tensorflow.python.framework import device as device_lib from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops @@ -659,21 +660,20 @@ def _split_by_task(devices, values): num_devices = len(devices) if num_devices != len(values): raise ValueError("len(devices) must equal len(values)") - pattern = re.compile(r"/task:(\d+)/") - per_task_devices = [] - per_task_values = [] + per_task_devices = collections.OrderedDict() + per_task_values = collections.OrderedDict() for d in range(num_devices): - m = pattern.search(devices[d]) - if m: - index = int(m.group(1)) - while index >= len(per_task_devices): - per_task_devices.append([]) - per_task_values.append([]) - per_task_devices[index].append(devices[d]) - per_task_values[index].append(values[d]) - else: + d_spec = device_lib.DeviceSpec.from_string(devices[d]) + if not hasattr(d_spec, "task") or d_spec.task is None: assert False, "failed to parse device %s" % devices[d] - return (per_task_devices, per_task_values) + index = (d_spec.job or "localhost", d_spec.replica or 0, d_spec.task) + if index not in per_task_devices: + per_task_devices[index] = [] + per_task_values[index] = [] + per_task_devices[index].append(devices[d]) + per_task_values[index].append(values[d]) + + return (list(per_task_devices.values()), list(per_task_values.values())) def build_nccl_all_reduce(input_tensors, red_op, un_op=None): -- GitLab From 22f3a97b8b089202f60bb0c7697feb0c8e0713cc Mon Sep 17 00:00:00 2001 From: Yifei Feng Date: Mon, 23 Apr 2018 21:19:14 -0700 Subject: [PATCH 186/434] Merge changes from github. PiperOrigin-RevId: 194031845 --- CODEOWNERS | 2 +- README.md | 2 +- RELEASE.md | 58 +++ WORKSPACE | 8 +- tensorflow/c/c_api.h | 4 +- tensorflow/c/c_api_experimental.cc | 12 + tensorflow/c/c_api_experimental.h | 4 +- tensorflow/c/eager/c_api.h | 4 +- tensorflow/compiler/aot/runtime.cc | 4 +- tensorflow/compiler/tests/binary_ops_test.py | 12 +- .../compiler/xla/python/xla_client_test.py | 1 - .../gpu/cudnn_convolution_algorithm_picker.cc | 4 +- .../compiler/xla/tests/dot_operation_test.cc | 7 + .../autograph/converters/call_trees.py | 2 +- .../autograph/converters/call_trees_test.py | 2 +- .../autograph/converters/decorators_test.py | 2 +- tensorflow/contrib/autograph/impl/api.py | 4 +- .../contrib/autograph/impl/conversion.py | 2 +- .../pyct/static_analysis/activity.py | 6 +- .../pyct/static_analysis/activity_test.py | 2 +- .../autograph/pyct/static_analysis/annos.py | 8 +- .../contrib/autograph/utils/builtins.py | 2 +- .../bayesflow/python/ops/monte_carlo_impl.py | 26 +- .../training/functions/gbdt_batch_test.py | 2 +- .../python/split_dependency_test.py | 2 +- tensorflow/contrib/cmake/CMakeLists.txt | 70 +++- tensorflow/contrib/cmake/README.md | 28 ++ .../contrib/cmake/external/gemmlowp.cmake | 4 +- .../contrib/cmake/external/mkldnn.cmake | 44 +++ tensorflow/contrib/cmake/external/png.cmake | 19 +- .../contrib/cmake/external/sqlite.cmake | 4 +- .../contrib/cmake/tf_core_framework.cmake | 8 +- tensorflow/contrib/cmake/tf_python.cmake | 9 +- tensorflow/contrib/cmake/tf_shared_lib.cmake | 3 +- .../contrib/cmake/tf_stream_executor.cmake | 6 + .../contrib/cmake/tools/create_def_file.py | 8 +- .../crf/python/kernel_tests/crf_test.py | 15 + tensorflow/contrib/crf/python/ops/crf.py | 8 +- .../cudnn_rnn/python/layers/cudnn_rnn.py | 3 +- .../contrib/data/python/kernel_tests/BUILD | 7 +- .../dataset_serialization_test_base.py | 2 +- .../interleave_dataset_op_test.py | 63 ++-- .../kernel_tests/stats_dataset_ops_test.py | 16 + .../contrib/data/python/ops/interleave_ops.py | 26 +- .../data/python/ops/prefetching_ops.py | 6 +- .../contrib/data/python/ops/scan_ops.py | 2 +- .../python/kernel_tests/shape_test.py | 1 - tensorflow/contrib/eager/python/saver_test.py | 1 - .../estimator/python/estimator/head.py | 2 +- .../python/estimator/replicate_model_fn.py | 4 +- .../factorization/python/ops/gmm_ops.py | 12 +- .../factorization/python/ops/kmeans.py | 4 +- tensorflow/contrib/framework/__init__.py | 3 +- .../python/framework/tensor_util_test.py | 2 +- .../ops/fused_conv2d_bias_activation_op.py | 2 +- .../fused_conv2d_bias_activation_op_test.py | 10 +- .../eval/python/sliced_wasserstein_impl.py | 2 +- .../features/python/virtual_batchnorm_impl.py | 6 +- tensorflow/contrib/hvx/README.md | 3 +- .../kernels/adjust_hsv_in_yiq_op_gpu.cu.cc | 2 +- .../contrib/image/ops/distort_image_ops.cc | 4 +- tensorflow/contrib/image/ops/image_ops.cc | 2 +- ...single_image_random_dot_stereograms_ops.cc | 4 +- .../contrib/image/python/ops/image_ops.py | 2 +- .../single_image_random_dot_stereograms.py | 2 +- .../contrib/kfac/python/ops/loss_functions.py | 6 +- .../kfac/python/ops/loss_functions_lib.py | 1 - .../labeled_tensor/python/ops/ops_test.py | 4 +- .../sparse_feature_cross_op_test.py | 2 +- .../layers/python/layers/feature_column.py | 2 +- .../python/layers/feature_column_ops.py | 4 +- .../contrib/layers/python/layers/layers.py | 142 ++++++- .../layers/python/layers/layers_test.py | 15 +- .../python/layers/rev_block_lib_test.py | 4 +- .../layers/python/layers/utils_test.py | 1 - .../python/learn/estimators/kmeans_test.py | 1 - .../python/learn/estimators/run_config.py | 1 + tensorflow/contrib/lite/Makefile | 3 +- .../contrib/lite/download_dependencies.sh | 6 +- .../project.pbxproj | 8 - tensorflow/contrib/lite/g3doc/apis.md | 2 +- .../Camera2BasicFragment.java | 23 ++ .../tflitecamerademo/ImageClassifier.java | 10 + .../res/layout/fragment_camera2_basic.xml | 41 ++- .../demo/app/src/main/res/values/strings.xml | 2 + .../java/org/tensorflow/lite/Interpreter.java | 7 + .../lite/NativeInterpreterWrapper.java | 6 + .../native/nativeinterpreterwrapper_jni.cc | 10 + .../native/nativeinterpreterwrapper_jni.h | 12 +- tensorflow/contrib/lite/kernels/add.cc | 2 +- tensorflow/contrib/lite/kernels/div.cc | 5 +- .../internal/optimized/optimized_ops.h | 2 +- .../internal/reference/reference_ops.h | 39 +- tensorflow/contrib/lite/kernels/sub.cc | 3 +- .../resolve_tensorflow_merge.cc | 2 +- tensorflow/contrib/lite/toco/model.h | 6 +- .../contrib/losses/python/losses/loss_ops.py | 9 +- .../python/metric_learning/metric_loss_ops.py | 4 +- .../contrib/makefile/download_dependencies.sh | 4 +- .../meta_graph_transform.py | 2 +- .../contrib/metrics/python/ops/metric_ops.py | 15 +- .../contrib/nn/python/ops/sampling_ops.py | 2 +- tensorflow/contrib/opt/BUILD | 17 + tensorflow/contrib/opt/__init__.py | 2 + .../contrib/opt/python/training/adamax.py | 191 ++++++++++ .../opt/python/training/adamax_test.py | 348 ++++++++++++++++++ .../training/moving_average_optimizer_test.py | 4 +- .../optimizer_v2/checkpointable_utils_test.py | 2 +- .../contrib/optimizer_v2/optimizer_v2.py | 2 +- .../quantize/python/fold_batch_norms.py | 2 +- .../kernel_tests/attention_wrapper_test.py | 112 +++++- .../seq2seq/python/ops/attention_wrapper.py | 38 +- .../python/kernel_tests/mel_ops_test.py | 13 + .../contrib/signal/python/ops/mel_ops.py | 16 +- tensorflow/contrib/slim/README.md | 8 +- .../contrib/slim/python/slim/learning.py | 5 +- .../slim/python/slim/nets/resnet_v1.py | 2 +- .../slim/python/slim/nets/resnet_v2.py | 2 +- .../tensor_forest/client/random_forest.py | 2 +- .../core/ops/hard_routing_function_op.cc | 2 +- .../stochastic_hard_routing_function_op.cc | 2 +- .../stochastic_hard_routing_gradient_op.cc | 2 +- .../tensor_forest/kernels/tree_utils.cc | 4 +- .../tensor_forest/kernels/tree_utils.h | 2 +- .../kernels/v4/decision-tree-resource.h | 2 +- .../kernels/v4/decision_node_evaluator.h | 2 +- .../contrib/tensor_forest/ops/model_ops.cc | 2 +- .../contrib/tensor_forest/ops/stats_ops.cc | 6 +- .../tensor_forest/python/tensor_forest.py | 2 +- tensorflow/contrib/tensorrt/BUILD | 21 +- tensorflow/contrib/tensorrt/README.md | 60 +-- .../resources/trt_resource_manager.cc | 6 + .../tensorrt/resources/trt_resource_manager.h | 6 +- .../tensorrt/test/tf_trt_integration_test.py | 156 ++++++++ .../python/timeseries/math_utils.py | 2 +- .../training/python/training/resample.py | 2 +- .../training/python/training/sampling_ops.py | 6 +- .../training/sequence_queueing_state_saver.py | 4 +- tensorflow/core/BUILD | 16 + .../base_api/api_def_ApplyAdaMax.pbtxt | 78 ++++ .../base_api/api_def_BroadcastTo.pbtxt | 41 +++ .../base_api/api_def_ImageSummary.pbtxt | 2 +- .../api_def_ResourceApplyAdaMax.pbtxt | 72 ++++ .../base_api/api_def_StringStrip.pbtxt | 16 + .../python_api/api_def_ApplyAdaMax.pbtxt | 4 + .../python_api/api_def_BroadcastTo.pbtxt | 4 + .../api_def_ResourceApplyAdaMax.pbtxt | 4 + .../core/common_runtime/bfc_allocator.h | 2 +- .../core/common_runtime/mkl_cpu_allocator.h | 4 + tensorflow/core/framework/collective.h | 2 +- tensorflow/core/framework/numeric_types.h | 4 +- tensorflow/core/graph/mkl_tfconversion_pass.h | 4 + .../grappler/clusters/single_machine_test.cc | 9 + tensorflow/core/grappler/optimizers/BUILD | 1 + .../optimizers/custom_graph_optimizer.h | 4 +- .../custom_graph_optimizer_registry_test.cc | 5 +- .../optimizers/meta_optimizer_test.cc | 5 +- tensorflow/core/kernels/BUILD | 50 +-- .../batching_util/shared_batch_scheduler.h | 6 +- tensorflow/core/kernels/broadcast_to_op.cc | 91 +++++ tensorflow/core/kernels/broadcast_to_op.h | 220 +++++++++++ .../core/kernels/broadcast_to_op_gpu.cu.cc | 34 ++ tensorflow/core/kernels/conv_ops_gpu.h | 5 +- tensorflow/core/kernels/ctc_decoder_ops.cc | 34 +- .../core/kernels/mkl_input_conversion_op.cc | 35 +- tensorflow/core/kernels/mkl_relu_op.cc | 8 +- tensorflow/core/kernels/roll_op.cc | 7 +- .../core/kernels/segment_reduction_ops.h | 8 + tensorflow/core/kernels/string_strip_op.cc | 53 +++ tensorflow/core/kernels/training_ops.cc | 150 ++++++++ tensorflow/core/kernels/training_ops.h | 12 + .../core/kernels/training_ops_gpu.cu.cc | 30 ++ tensorflow/core/lib/bfloat16/bfloat16.h | 4 +- tensorflow/core/lib/gtl/manual_constructor.h | 2 +- tensorflow/core/lib/strings/stringprintf.cc | 10 +- .../core/lib/strings/stringprintf_test.cc | 4 +- tensorflow/core/ops/array_ops.cc | 52 +++ tensorflow/core/ops/dataset_ops.cc | 140 ++++++- tensorflow/core/ops/manip_ops.cc | 13 +- tensorflow/core/ops/nn_ops.cc | 6 + tensorflow/core/ops/random_ops.cc | 7 +- tensorflow/core/ops/string_ops.cc | 5 + tensorflow/core/ops/training_ops.cc | 51 +++ tensorflow/core/platform/default/logging.cc | 1 + .../platform/hadoop/hadoop_file_system.cc | 2 + .../core/protobuf/rewriter_config.proto | 11 + tensorflow/core/public/version.h | 4 +- tensorflow/core/util/memmapped_file_system.cc | 2 +- tensorflow/core/util/memmapped_file_system.h | 4 +- tensorflow/core/util/mkl_util.h | 4 + .../python/contrib.bayesflow.monte_carlo.md | 28 +- .../docs_src/community/documentation.md | 52 +-- tensorflow/docs_src/deploy/s3.md | 81 +++- .../docs_src/extend/language_bindings.md | 9 +- tensorflow/docs_src/install/install_c.md | 2 +- tensorflow/docs_src/install/install_go.md | 2 +- tensorflow/docs_src/install/install_java.md | 24 +- tensorflow/docs_src/install/install_linux.md | 58 ++- tensorflow/docs_src/install/install_mac.md | 10 +- .../docs_src/install/install_sources.md | 9 +- tensorflow/docs_src/mobile/android_build.md | 3 +- .../docs_src/performance/quantization.md | 2 +- .../docs_src/programmers_guide/debugger.md | 2 +- .../docs_src/programmers_guide/graphs.md | 6 +- .../docs_src/programmers_guide/saved_model.md | 50 +-- .../docs_src/programmers_guide/using_tpu.md | 4 +- .../docs_src/tutorials/audio_recognition.md | 2 +- tensorflow/docs_src/tutorials/layers.md | 17 +- .../tutorials/word2vec/word2vec_basic.py | 2 +- tensorflow/go/op/wrappers.go | 2 +- .../org/tensorflow/examples/LabelImage.java | 2 + tensorflow/python/BUILD | 19 +- tensorflow/python/debug/cli/readline_ui.py | 8 +- .../python/debug/wrappers/grpc_wrapper.py | 11 +- tensorflow/python/debug/wrappers/hooks.py | 17 +- tensorflow/python/estimator/canned/head.py | 9 +- tensorflow/python/estimator/estimator.py | 5 +- tensorflow/python/estimator/run_config.py | 33 +- .../python/estimator/run_config_test.py | 24 +- .../python/feature_column/feature_column.py | 1 - tensorflow/python/framework/dtypes.py | 14 +- .../python/framework/graph_util_impl.py | 2 +- .../python/framework/graph_util_test.py | 2 +- tensorflow/python/framework/load_library.py | 2 +- tensorflow/python/framework/python_op_gen.i | 8 +- tensorflow/python/framework/test_util.py | 2 + .../python/grappler/layout_optimizer_test.py | 10 +- .../python/keras/_impl/keras/backend.py | 4 +- .../keras/_impl/keras/layers/normalization.py | 4 +- tensorflow/python/kernel_tests/BUILD | 26 ++ .../kernel_tests/broadcast_to_ops_test.py | 85 +++++ .../kernel_tests/confusion_matrix_test.py | 7 +- .../python/kernel_tests/constant_op_test.py | 5 + .../kernel_tests/conv3d_transpose_test.py | 12 + .../python/kernel_tests/manip_ops_test.py | 55 ++- .../python/kernel_tests/norm_op_test.py | 16 +- .../python/kernel_tests/py_func_test.py | 32 ++ .../random/multinomial_op_test.py | 2 +- .../kernel_tests/random/random_ops_test.py | 11 + .../kernel_tests/string_strip_op_test.py | 56 +++ tensorflow/python/lib/core/py_func.cc | 3 + tensorflow/python/ops/array_ops.py | 15 +- .../python/ops/distributions/categorical.py | 2 +- tensorflow/python/ops/embedding_ops.py | 26 +- tensorflow/python/ops/histogram_ops.py | 1 - tensorflow/python/ops/image_ops_impl.py | 74 ++-- tensorflow/python/ops/init_ops.py | 18 +- tensorflow/python/ops/linalg_ops.py | 77 ++-- tensorflow/python/ops/linalg_ops_impl.py | 73 ++++ tensorflow/python/ops/losses/losses_impl.py | 23 +- tensorflow/python/ops/math_ops.py | 38 +- tensorflow/python/ops/nn.py | 1 + tensorflow/python/ops/nn_impl.py | 11 +- tensorflow/python/ops/nn_ops.py | 8 +- tensorflow/python/ops/rnn_cell_impl.py | 4 +- .../python/profiler/tfprof_logger_test.py | 2 +- tensorflow/python/tools/saved_model_cli.py | 3 +- tensorflow/python/training/saver_test.py | 2 +- tensorflow/python/util/compat.py | 7 +- tensorflow/stream_executor/cuda/cuda_dnn.cc | 7 +- tensorflow/stream_executor/cuda/cuda_dnn.h | 2 +- .../stream_executor/cuda/cuda_driver.cc | 14 +- .../stream_executor/cuda/cuda_gpu_executor.cc | 2 +- tensorflow/stream_executor/dnn.h | 20 +- tensorflow/stream_executor/platform/port.h | 6 - tensorflow/tensorflow.bzl | 3 +- .../tensorflow.estimator.-run-config.pbtxt | 6 +- tensorflow/tools/api/golden/tensorflow.pbtxt | 4 + tensorflow/tools/ci_build/builds/pip.sh | 4 + .../tools/ci_build/builds/test_user_ops.sh | 41 ++- .../tools/ci_build/linux/cpu/run_mkl.sh | 5 +- .../ci_build/windows/gpu/cmake/run_py.bat | 6 +- tensorflow/tools/docker/Dockerfile.devel | 2 +- .../tools/docker/Dockerfile.devel-cpu-mkl | 2 +- tensorflow/tools/docker/Dockerfile.devel-gpu | 2 +- tensorflow/tools/git/gen_git_source.py | 56 ++- tensorflow/tools/git/gen_git_source.sh | 10 +- .../tools/graph_transforms/transform_graph.cc | 70 +++- tensorflow/tools/pip_package/setup.py | 2 +- tensorflow/workspace.bzl | 9 +- third_party/repo.bzl | 3 +- 281 files changed, 4024 insertions(+), 895 deletions(-) create mode 100644 tensorflow/contrib/cmake/external/mkldnn.cmake create mode 100644 tensorflow/contrib/opt/python/training/adamax.py create mode 100644 tensorflow/contrib/opt/python/training/adamax_test.py create mode 100644 tensorflow/contrib/tensorrt/test/tf_trt_integration_test.py create mode 100644 tensorflow/core/api_def/base_api/api_def_ApplyAdaMax.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_BroadcastTo.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_ResourceApplyAdaMax.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_StringStrip.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ApplyAdaMax.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_BroadcastTo.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ResourceApplyAdaMax.pbtxt create mode 100644 tensorflow/core/kernels/broadcast_to_op.cc create mode 100644 tensorflow/core/kernels/broadcast_to_op.h create mode 100644 tensorflow/core/kernels/broadcast_to_op_gpu.cu.cc create mode 100644 tensorflow/core/kernels/string_strip_op.cc create mode 100644 tensorflow/python/kernel_tests/broadcast_to_ops_test.py create mode 100644 tensorflow/python/kernel_tests/string_strip_op_test.py create mode 100644 tensorflow/python/ops/linalg_ops_impl.py diff --git a/CODEOWNERS b/CODEOWNERS index 007a304c3e..b9f0313cc6 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -45,7 +45,7 @@ # /tensorflow/contrib/session_bundle/ @nfiedel @sukritiramesh # /tensorflow/contrib/slim/ @sguada @thenbasilmanran # /tensorflow/contrib/stateless/ @girving -# /tensorflow/contrib/tensor_forest/ @gilberthendry @thomascolthurst +# /tensorflow/contrib/tensor_forest/ @gilberthendry @thomascolthurst @yupbank # /tensorflow/contrib/testing/ @dandelionmane # /tensorflow/contrib/timeseries/ @allenlavoie # /tensorflow/contrib/tpu/ @frankchn @saeta @jhseu diff --git a/README.md b/README.md index 29418dc2e9..e1a50c87e2 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ data flow graphs. The graph nodes represent mathematical operations, while the graph edges represent the multidimensional data arrays (tensors) that flow between them. This flexible architecture enables you to deploy computation to one or more CPUs or GPUs in a desktop, server, or mobile device without rewriting -code. TensorFlow also includes TensorBoard, a data visualization toolkit. +code. TensorFlow also includes [TensorBoard](https://www.tensorflow.org/programmers_guide/summaries_and_tensorboard), a data visualization toolkit. TensorFlow was originally developed by researchers and engineers working on the Google Brain team within Google's Machine Intelligence Research diff --git a/RELEASE.md b/RELEASE.md index e845953174..2717c75740 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -1,3 +1,61 @@ +# Release 1.8.0 + +## Major Features And Improvements +* Can now pass `tf.contrib.distribute.MirroredStrategy()` to `tf.estimator.RunConfig()` to run an Estimator model on multiple GPUs on one machine. +* Add `tf.contrib.data.prefetch_to_device()`, which supports prefetching to GPU memory. +* Added Gradient Boosted Trees as pre-made Estimators: BoostedTreesClassifier, BoostedTreesRegressor. +* Add 3rd generation pipeline config for Cloud TPUs which improves performance and usability. +* `tf.contrib.bayesflow` is moving out to it's own repo. +* Added `tf.contrib.{proto,rpc}` to allow generic proto parsing and RPC communication. + +## Bug Fixes and Other Changes +* `tf.data`: + * Add `tf.contrib.data.prefetch_to_device`, which enables prefetching dataset elements to GPU memory. + * Add `tf.contrib.data.AUTOTUNE`, which allows the tf.data runtime to automatically tune the prefetch buffer sizes based on your system and environment. + * Add `tf.contrib.data.make_csv_dataset` for building datasets of CSV files. +* Eager Execution: + * With eager execution Datasets can now be used as standard python iterators (`for batch in dataset:`). Both `Dataset.__iter__()` and `Dataset.make_one_shot_iterator()` can now be used to create iterators when eager execution is enabled. + * Automatic device placement has been enabled (i.e., use a GPU if available automatically, without requiring an explicit `with tf.device(“/gpu:0”)`) (Fixes #14133) + * `tf.GradientTape` has moved out of contrib. +* `tf.keras`: + * Added the fashion mnist dataset. + * New data preprocessing functions: `image/random_brightness`, `sequence/TimeseriesGenerator`, and `text/hashing_trick`. +* Accelerated Linear Algebra (XLA): + * Select and scatter in reference util and evaluator now use lexicographical order to break ties. +* TensorFlow Debugger (tfdbg) CLI: + * During tensor-filter operations, allow exclusion of nodes by regular expressions. + * Fix spurious background colors in some text terminals. +* `tf.contrib`: + * Add meta-distribution BatchReshape which reshapes batch dimensions. + * `tf.contrib.layers.recompute_grad` works for explicit gradient checkpointing on TPU. + * Add `tf.contrib.framework.argsort`. + * Allow `DNNBoostedTreeCombinedEstimator` to work with core versions of feature columns and losses. + * Add non-linear image warping ops: `tf.contrib.image.sparse_image_warp`, `tf.contrib.image.dense_image_warp`, and `tf.contrib.image.interpolate_spline`. + * Fix bug in `tf.contrib.opt.MultitaskOptimizerWrapper` where types of tensors were mismatched. +* Other: + * Low-level graph construction now calls the TensorFlow C API. This change should be invisible to most users, but can be disabled by setting the environment variable `TF_C_API_GRAPH_CONSTRUCTION=0` in this release. Future releases will remove the ability to disable this change. Please [file a bug](https://github.com/tensorflow/tensorflow/issues/new) if you find yourself using this escape hatch. + * Add description of shapes and a pointer to tutorial notebook in `tf.distributions.Distribution`. + * Update scatter operations: + * Add `tf.scatter_min` and `tf.scatter_max` + * Extend scatter operations to work with a scalar update parameter. + * Move cuDNN RNN ops to core for use in TensorFlow codebase only. + * Add `float64` support for `Conv2d`, `Conv2dBackpropInput`, and `Conv2dBackpropFilter`. + * Add `float64` support for `AvgPool`/`AvgPoolGrad`. + * Make graph name scope thread local so that they work correctly in multi-threaded environments. + * Update nsync synchronization library to avoid slow primitives on Linux. + * Removed need to put nsync/public on C include path when building custom ops. + * Add `tf.image.psnr`, `tf.image.ssim`, `tf.image.ssim_multiscale`, `tf.image.image_gradients`, `tf.image.sobel_edges`. + * Add links to https://js.tensorflow.org. + * Fix non-uniformity of orthogonal matrices. + * Fix bug where multi-image Estimator eval summaries were not displayed correctly. + +## Thanks to our Contributors + +This release contains contributions from many people at Google, as well as: + +4d55397500, Aghasy, Alan Du, Alan Lee, Alan Yee, Alex Wiltschko, Animesh Karnewar, Ankit Gupta, Anton Matosov, Aris L, Ben Barsdell, Brent Yi, Brett Koonce, Carl Thomé, cbockman, Chikanaga Tomoyuki, Chris Tava, CéDric Deltheil, Dahan Gong, Dalmo Cirne, Daniel Erenrich, David Norman, DavidNorman, Edd Wilder-James, Fanjin Zeng, Felix Abecassis, fo40225, George Sterpu, Giovanni Terlingen, Gor Baghdasaryan, Guillaume Klein, Hanchen Li, Ilya Polenov, Jakub Kolodziejczyk, Jason Sadler, Jayaram Bobba, Jerry Liu, jinghuangintel, Jiongyan Zhang (张炯衍), Joel Shor, Jong Wook Kim, Julian Eisenschlos, Karl Lessard, Krish Ravindranath, Loo Rong Jie, Lukas Geiger, Luke Iwanski, Mahmoud Abuzaina, ManHyuk, Marvin Richter, Maximilian Mitchell, Mohammad Ashraf Bhuiyan, msofka, Mustafa Kasap, Nathan Burnham, Nathan Luehr, Naveen Marri, ngc92, nio1814, Oleg Zabluda, Ou Changkun, Panos Ipeirotis, Paul Van Eck, Peter Lee, Piotr Czapla, qjivy, Rholais Lii, Rodrigo Formigone, Russell Klopfer, ryantimjohn, Sang Han, SebastiáN RamíRez, shengfuintel, Siby Jose Plathottam, Silver Chan, Stanislaw Antol, Taehoon Lee, Tarang Chugh, Ted Chang, Thomas Bastiani, Xian Xu, Xiaoming (Jason) Cui, Yan Facai (颜发才), yaox12, Yashal Shakti Kanungo, Yong Tang, Yuan (Terry) Tang, Yuxin Wu, Ziyue(Louis) Lu + + # Release 1.7.0 ## Major Features And Improvements diff --git a/WORKSPACE b/WORKSPACE index 11c5cdb207..4ddfb9a383 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -2,11 +2,11 @@ workspace(name = "org_tensorflow") http_archive( name = "io_bazel_rules_closure", - sha256 = "6691c58a2cd30a86776dd9bb34898b041e37136f2dc7e24cadaeaf599c95c657", - strip_prefix = "rules_closure-08039ba8ca59f64248bb3b6ae016460fe9c9914f", + sha256 = "a38539c5b5c358548e75b44141b4ab637bba7c4dc02b46b1f62a96d6433f56ae", + strip_prefix = "rules_closure-dbb96841cc0a5fb2664c37822803b06dab20c7d1", urls = [ - "https://mirror.bazel.build/github.com/bazelbuild/rules_closure/archive/08039ba8ca59f64248bb3b6ae016460fe9c9914f.tar.gz", - "https://github.com/bazelbuild/rules_closure/archive/08039ba8ca59f64248bb3b6ae016460fe9c9914f.tar.gz", # 2018-01-16 + "https://mirror.bazel.build/github.com/bazelbuild/rules_closure/archive/dbb96841cc0a5fb2664c37822803b06dab20c7d1.tar.gz", + "https://github.com/bazelbuild/rules_closure/archive/dbb96841cc0a5fb2664c37822803b06dab20c7d1.tar.gz", # 2018-04-13 ], ) diff --git a/tensorflow/c/c_api.h b/tensorflow/c/c_api.h index fe85f8ee0e..c859434745 100644 --- a/tensorflow/c/c_api.h +++ b/tensorflow/c/c_api.h @@ -72,7 +72,7 @@ limitations under the License. #ifdef SWIG #define TF_CAPI_EXPORT #else -#if defined(COMPILER_MSVC) +#if defined(_WIN32) #ifdef TF_COMPILE_LIBRARY #define TF_CAPI_EXPORT __declspec(dllexport) #else @@ -80,7 +80,7 @@ limitations under the License. #endif // TF_COMPILE_LIBRARY #else #define TF_CAPI_EXPORT __attribute__((visibility("default"))) -#endif // COMPILER_MSVC +#endif // _WIN32 #endif // SWIG #ifdef __cplusplus diff --git a/tensorflow/c/c_api_experimental.cc b/tensorflow/c/c_api_experimental.cc index 9678ee926f..d3916bc167 100644 --- a/tensorflow/c/c_api_experimental.cc +++ b/tensorflow/c/c_api_experimental.cc @@ -184,6 +184,7 @@ library { return std::move(functions[0]); } +#if not defined(PLATFORM_WINDOWS) // On success, returns a set of TF_Function instances encoding a dataset // node stack that reads a Imagenet TFRecordFile dataset from `file_path`, and // sets `dataset_name` to the created dataset name. The returned functions must @@ -7076,7 +7077,9 @@ library { return CreateFunctionsFromTextProto(func_def, &mutate_proto_func, status); #endif } +#endif +#if not defined(PLATFORM_WINDOWS) // On success, returns a set of TF_Function instances encoding a dataset // node stack that reads an MNIST file dataset from `file_path`, and // sets `dataset_name` to the created dataset name. The returned functions must @@ -8221,6 +8224,7 @@ library { return CreateFunctionsFromTextProto(func_def, &mutate_proto_func, status); #endif } +#endif // Adds the input functions to `graph`. On success, returns the created // IteratorGetNext node. @@ -8314,6 +8318,13 @@ TF_Operation* TF_MakeFakeIteratorGetNextWithDatasets(TF_Graph* graph, TF_Operation* TF_MakeFileBasedIteratorGetNextWithDatasets( TF_Graph* graph, const char* file_path, int batch_size, unsigned char is_mnist, TF_Status* status) { +#if defined(PLATFORM_WINDOWS) + // TODO(ashankar): get these functions working on Windows. + status->status = tensorflow::errors::Unimplemented( + "TF_MakeFileBasedIteratorGetNextWithDatasets in the experimental C API " + "is not implemented for Windows"); + return nullptr; +#else tensorflow::Status s; std::string dataset_name; @@ -8355,4 +8366,5 @@ TF_Operation* TF_MakeFileBasedIteratorGetNextWithDatasets( << graph->graph.ToGraphDefDebug().DebugString(); return getnext_node; +#endif } diff --git a/tensorflow/c/c_api_experimental.h b/tensorflow/c/c_api_experimental.h index 666342974e..88cb173cd2 100644 --- a/tensorflow/c/c_api_experimental.h +++ b/tensorflow/c/c_api_experimental.h @@ -35,7 +35,7 @@ limitations under the License. #ifdef SWIG #define TF_CAPI_EXPORT #else -#if defined(COMPILER_MSVC) +#if defined(_WIN32) #ifdef TF_COMPILE_LIBRARY #define TF_CAPI_EXPORT __declspec(dllexport) #else @@ -43,7 +43,7 @@ limitations under the License. #endif // TF_COMPILE_LIBRARY #else #define TF_CAPI_EXPORT __attribute__((visibility("default"))) -#endif // COMPILER_MSVC +#endif // _WIN32 #endif // SWIG #ifdef __cplusplus diff --git a/tensorflow/c/eager/c_api.h b/tensorflow/c/eager/c_api.h index 15ac0f376c..ba77f3cd07 100644 --- a/tensorflow/c/eager/c_api.h +++ b/tensorflow/c/eager/c_api.h @@ -30,7 +30,7 @@ limitations under the License. #ifdef SWIG #define TF_CAPI_EXPORT #else -#if defined(COMPILER_MSVC) +#if defined(_WIN32) #ifdef TF_COMPILE_LIBRARY #define TF_CAPI_EXPORT __declspec(dllexport) #else @@ -38,7 +38,7 @@ limitations under the License. #endif // TF_COMPILE_LIBRARY #else #define TF_CAPI_EXPORT __attribute__((visibility("default"))) -#endif // COMPILER_MSVC +#endif // _WIN32 #endif // SWIG #ifdef __cplusplus diff --git a/tensorflow/compiler/aot/runtime.cc b/tensorflow/compiler/aot/runtime.cc index 5772776666..5e74079fc1 100644 --- a/tensorflow/compiler/aot/runtime.cc +++ b/tensorflow/compiler/aot/runtime.cc @@ -31,7 +31,7 @@ namespace { inline void* aligned_malloc(size_t size, int minimum_alignment) { #if defined(__ANDROID__) || defined(OS_ANDROID) || defined(OS_CYGWIN) return memalign(minimum_alignment, size); -#elif defined(COMPILER_MSVC) +#elif defined(_WIN32) return _aligned_malloc(size, minimum_alignment); #else // !__ANDROID__ && !OS_ANDROID && !OS_CYGWIN void* ptr = nullptr; @@ -48,7 +48,7 @@ inline void* aligned_malloc(size_t size, int minimum_alignment) { } inline void aligned_free(void* aligned_memory) { -#if defined(COMPILER_MSVC) +#if defined(_WIN32) _aligned_free(aligned_memory); #else free(aligned_memory); diff --git a/tensorflow/compiler/tests/binary_ops_test.py b/tensorflow/compiler/tests/binary_ops_test.py index d1d7379c0a..1e4dd32916 100644 --- a/tensorflow/compiler/tests/binary_ops_test.py +++ b/tensorflow/compiler/tests/binary_ops_test.py @@ -360,11 +360,13 @@ class BinaryOpsTest(XLATestCase): np.array([2, -1], dtype=dtype), expected=np.array([[[[3, 1], [5, 3]]]], dtype=dtype)) - self._testBinary( - math_ops.add, - np.array([0xffffffff, 0xfffffffff, 1, 1], dtype=np.int64), - np.array([1, 1, 0xffffffff, 0xfffffffff], dtype=np.int64), - expected=np.array([1 << 32, 1 << 36, 1 << 32, 1 << 36], dtype=np.int64)) + if np.int64 in self.numeric_types: + self._testBinary( + math_ops.add, + np.array([0xffffffff, 0xfffffffff, 1, 1], dtype=np.int64), + np.array([1, 1, 0xffffffff, 0xfffffffff], dtype=np.int64), + expected=np.array([1 << 32, 1 << 36, 1 << 32, 1 << 36], + dtype=np.int64)) def testComplexOps(self): for dtype in self.complex_types: diff --git a/tensorflow/compiler/xla/python/xla_client_test.py b/tensorflow/compiler/xla/python/xla_client_test.py index 6fe7b242e4..c073c02040 100644 --- a/tensorflow/compiler/xla/python/xla_client_test.py +++ b/tensorflow/compiler/xla/python/xla_client_test.py @@ -1160,7 +1160,6 @@ class EmbeddedComputationsTest(LocalComputationTest): self._ExecuteAndCompareClose( c, expected=np.sum(input_array, axis=tuple(dims))) - _ReduceAndTest(0) _ReduceAndTest(0) _ReduceAndTest(0, 1) _ReduceAndTest(0, 2) diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc index 1790c50d4d..c4c56c5692 100644 --- a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc +++ b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc @@ -97,9 +97,9 @@ bool ShouldIncludeWinogradNonfusedAlgo(const Shape& input_shape, const ConvolutionDimensionNumbers& dnums, se::StreamExecutor* stream_exec) { // Skip this check for cudnn7 and newer. - se::port::StatusOr> version = + auto version = stream_exec->AsDnn()->GetVersion(); - if (version.ok() && std::get<0>(version.ValueOrDie()) >= 7) { + if (version.ok() && version.ValueOrDie().major_version() >= 7) { return true; } diff --git a/tensorflow/compiler/xla/tests/dot_operation_test.cc b/tensorflow/compiler/xla/tests/dot_operation_test.cc index 7b994a4c17..c4031dfee5 100644 --- a/tensorflow/compiler/xla/tests/dot_operation_test.cc +++ b/tensorflow/compiler/xla/tests/dot_operation_test.cc @@ -50,6 +50,13 @@ using TypesF16F32 = ::testing::Types; using TypesF16F32F64 = ::testing::Types; using TypesF16F32F64CF64 = ::testing::Types; +#elif !defined(XLA_BACKEND_DOES_NOT_SUPPORT_FLOAT16) && \ + defined(XLA_BACKEND_DOES_NOT_SUPPORT_FLOAT64) && \ + defined(XLA_BACKEND_DOES_NOT_SUPPORT_COMPLEX) +using TypesF16F32 = ::testing::Types; +using TypesF16F32F64 = ::testing::Types; +using TypesF16F32F64CF64 = + ::testing::Types; #else #error "Situation not handled yet" #endif diff --git a/tensorflow/contrib/autograph/converters/call_trees.py b/tensorflow/contrib/autograph/converters/call_trees.py index 2e5590b46c..554f0471d4 100644 --- a/tensorflow/contrib/autograph/converters/call_trees.py +++ b/tensorflow/contrib/autograph/converters/call_trees.py @@ -146,7 +146,7 @@ class CallTreeTransformer(transformer.Base): # Inspect the target function decorators. If any include a @convert # or @graph_ready annotation, then they must be called as they are. # TODO(mdan): This may be quite heavy. - # To parse and re-analize each function for every call site could be quite + # To parse and re-analyze each function for every call site could be quite # wasteful. Maybe we could cache the parsed AST? try: target_node, _ = parser.parse_entity(target_entity) diff --git a/tensorflow/contrib/autograph/converters/call_trees_test.py b/tensorflow/contrib/autograph/converters/call_trees_test.py index c666dcb73b..303dd54a4e 100644 --- a/tensorflow/contrib/autograph/converters/call_trees_test.py +++ b/tensorflow/contrib/autograph/converters/call_trees_test.py @@ -34,7 +34,7 @@ class CallTreesTest(converter_test_base.TestCase): def test_basic(self): def test_fn_1(_): - raise ValueError('This should not be called in the compiled verison.') + raise ValueError('This should not be called in the compiled version.') def renamed_test_fn_1(a): return a + 1 diff --git a/tensorflow/contrib/autograph/converters/decorators_test.py b/tensorflow/contrib/autograph/converters/decorators_test.py index e67ab1cd6a..9c01f68912 100644 --- a/tensorflow/contrib/autograph/converters/decorators_test.py +++ b/tensorflow/contrib/autograph/converters/decorators_test.py @@ -28,7 +28,7 @@ from tensorflow.python.platform import test # The Python parser only briefly captures decorators into the AST. # The interpreter desugars them on load, and the decorated function loses any -# trace of the decorator (which is notmally what you would expect, since +# trace of the decorator (which is normally what you would expect, since # they are meant to be transparent). # However, decorators are still visible when you analyze the function # from inside a decorator, before it was applied - as is the case diff --git a/tensorflow/contrib/autograph/impl/api.py b/tensorflow/contrib/autograph/impl/api.py index d874ef15c9..24f87b2c14 100644 --- a/tensorflow/contrib/autograph/impl/api.py +++ b/tensorflow/contrib/autograph/impl/api.py @@ -49,7 +49,7 @@ def convert(recursive=False, verbose=False, arg_types=None): function is called. This means the parameter values are known at compilation. Args: - recursive: Whether to recusrively convert any functions that the decorator + recursive: Whether to recursively convert any functions that the decorator function may call. verbose: Whether to output the compiled code in the logs. arg_types: See to_graph. @@ -215,7 +215,7 @@ def to_graph(e, Args: e: A Python entity. - recursive: Whether to recusrively convert any functions that the decorator + recursive: Whether to recursively convert any functions that the decorator function may call. verbose: Whether to output the compiled code in the logs. arg_values: A dict containing value hints for symbols like function diff --git a/tensorflow/contrib/autograph/impl/conversion.py b/tensorflow/contrib/autograph/impl/conversion.py index e7230a5f45..55a30dc127 100644 --- a/tensorflow/contrib/autograph/impl/conversion.py +++ b/tensorflow/contrib/autograph/impl/conversion.py @@ -61,7 +61,7 @@ class ConversionMap(object): This object is mutable, and is updated as functions are converted. Attributes: - recursive: Whether to recusrively convert any functions that the decorator + recursive: Whether to recursively convert any functions that the decorator function may call. nocompile_decorators: tuple of decorator functions that toggle compilation off. diff --git a/tensorflow/contrib/autograph/pyct/static_analysis/activity.py b/tensorflow/contrib/autograph/pyct/static_analysis/activity.py index b81f5c7f87..2c14c2c8c2 100644 --- a/tensorflow/contrib/autograph/pyct/static_analysis/activity.py +++ b/tensorflow/contrib/autograph/pyct/static_analysis/activity.py @@ -162,11 +162,11 @@ class Scope(object): self.parent.mark_returned(name) -class ActivityAnalizer(transformer.Base): +class ActivityAnalyzer(transformer.Base): """Annotates nodes with local scope information. See Scope.""" def __init__(self, context, parent_scope): - super(ActivityAnalizer, self).__init__(context) + super(ActivityAnalyzer, self).__init__(context) self.scope = Scope(parent_scope) self._in_return_statement = False @@ -356,4 +356,4 @@ class ActivityAnalizer(transformer.Base): def resolve(node, context, parent_scope=None): - return ActivityAnalizer(context, parent_scope).visit(node) + return ActivityAnalyzer(context, parent_scope).visit(node) diff --git a/tensorflow/contrib/autograph/pyct/static_analysis/activity_test.py b/tensorflow/contrib/autograph/pyct/static_analysis/activity_test.py index d1c4a94b14..ef79a295bf 100644 --- a/tensorflow/contrib/autograph/pyct/static_analysis/activity_test.py +++ b/tensorflow/contrib/autograph/pyct/static_analysis/activity_test.py @@ -108,7 +108,7 @@ class ScopeTest(test.TestCase): self.assertFalse(QN('a') in child.referenced) -class ActivityAnalizerTest(test.TestCase): +class ActivityAnalyzerTest(test.TestCase): def _parse_and_analyze(self, test_fn): node, source = parser.parse_entity(test_fn) diff --git a/tensorflow/contrib/autograph/pyct/static_analysis/annos.py b/tensorflow/contrib/autograph/pyct/static_analysis/annos.py index d6d9f7e1a6..b929b35b79 100644 --- a/tensorflow/contrib/autograph/pyct/static_analysis/annos.py +++ b/tensorflow/contrib/autograph/pyct/static_analysis/annos.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Annotations used by the static analizer.""" +"""Annotations used by the static analyzer.""" from __future__ import absolute_import from __future__ import division @@ -28,15 +28,15 @@ class NoValue(Enum): class NodeAnno(NoValue): - """Additionnal annotations used by the static analyzer. + """Additional annotations used by the static analyzer. These are in addition to the basic annotations declared in anno.py. """ # Symbols # These flags are boolean. - IS_LOCAL = 'Symbol is local to the function scope being analized.' - IS_PARAM = 'Symbol is a parameter to the function being analized.' + IS_LOCAL = 'Symbol is local to the function scope being analyzed.' + IS_PARAM = 'Symbol is a parameter to the function being analyzed.' IS_MODIFIED_SINCE_ENTRY = ( 'Symbol has been explicitly replaced in the current function scope.') diff --git a/tensorflow/contrib/autograph/utils/builtins.py b/tensorflow/contrib/autograph/utils/builtins.py index dfc3c86a3d..211e8eaee9 100644 --- a/tensorflow/contrib/autograph/utils/builtins.py +++ b/tensorflow/contrib/autograph/utils/builtins.py @@ -77,7 +77,7 @@ def is_tf_print_compatible(value): def dynamic_print(*values): - """Implementartion of print using dynamic dispatch. + """Implementation of print using dynamic dispatch. The function attempts to use tf.Print if all the values are compatible. Otherwise, it will fall back to py_func. diff --git a/tensorflow/contrib/bayesflow/python/ops/monte_carlo_impl.py b/tensorflow/contrib/bayesflow/python/ops/monte_carlo_impl.py index d193a8459d..032b859d46 100644 --- a/tensorflow/contrib/bayesflow/python/ops/monte_carlo_impl.py +++ b/tensorflow/contrib/bayesflow/python/ops/monte_carlo_impl.py @@ -44,15 +44,13 @@ def expectation_importance_sampler(f, n=None, seed=None, name='expectation_importance_sampler'): - r"""Monte Carlo estimate of `\\(E_p[f(Z)] = E_q[f(Z) p(Z) / q(Z)]\\)`. + r"""Monte Carlo estimate of \\(E_p[f(Z)] = E_q[f(Z) p(Z) / q(Z)]\\). - With `\\(p(z) := exp^{log_p(z)}\\)`, this `Op` returns + With \\(p(z) := exp^{log_p(z)}\\), this `Op` returns - ``` \\(n^{-1} sum_{i=1}^n [ f(z_i) p(z_i) / q(z_i) ], z_i ~ q,\\) \\(\approx E_q[ f(Z) p(Z) / q(Z) ]\\) \\(= E_p[f(Z)]\\) - ``` This integral is done in log-space with max-subtraction to better handle the often extreme values that `f(z) p(z) / q(z)` can take on. @@ -121,14 +119,12 @@ def expectation_importance_sampler_logspace( name='expectation_importance_sampler_logspace'): r"""Importance sampling with a positive function, in log-space. - With `\\(p(z) := exp^{log_p(z)}\\)`, and `\\(f(z) = exp{log_f(z)}\\)`, + With \\(p(z) := exp^{log_p(z)}\\), and \\(f(z) = exp{log_f(z)}\\), this `Op` returns - ``` \\(Log[ n^{-1} sum_{i=1}^n [ f(z_i) p(z_i) / q(z_i) ] ], z_i ~ q,\\) \\(\approx Log[ E_q[ f(Z) p(Z) / q(Z) ] ]\\) \\(= Log[E_p[f(Z)]]\\) - ``` This integral is done in log-space with max-subtraction to better handle the often extreme values that `f(z) p(z) / q(z)` can take on. @@ -196,13 +192,11 @@ def _logspace_mean(log_values): def expectation(f, samples, log_prob=None, use_reparametrization=True, axis=0, keep_dims=False, name=None): - """Computes the Monte-Carlo approximation of `\\(E_p[f(X)]\\)`. + """Computes the Monte-Carlo approximation of \\(E_p[f(X)]\\). This function computes the Monte-Carlo approximation of an expectation, i.e., - ```none \\(E_p[f(X)] \approx= m^{-1} sum_i^m f(x_j), x_j\ ~iid\ p(X)\\) - ``` where: @@ -216,8 +210,8 @@ def expectation(f, samples, log_prob=None, use_reparametrization=True, parameterless distribution (e.g., `Normal(Y; m, s) <=> Y = sX + m, X ~ Normal(0,1)`), we can swap gradient and expectation, i.e., - `grad[ Avg{ \\(s_i : i=1...n\\) } ] = Avg{ grad[\\(s_i\\)] : i=1...n }` where - `S_n = Avg{\\(s_i\\)}` and `\\(s_i = f(x_i), x_i ~ p\\)`. + grad[ Avg{ \\(s_i : i=1...n\\) } ] = Avg{ grad[\\(s_i\\)] : i=1...n } where + S_n = Avg{\\(s_i\\)}` and `\\(s_i = f(x_i), x_i ~ p\\). However, if p is not reparameterized, TensorFlow's gradient will be incorrect since the chain-rule stops at samples of non-reparameterized distributions. @@ -296,7 +290,7 @@ def expectation(f, samples, log_prob=None, use_reparametrization=True, Args: f: Python callable which can return `f(samples)`. samples: `Tensor` of samples used to form the Monte-Carlo approximation of - `\\(E_p[f(X)]\\)`. A batch of samples should be indexed by `axis` + \\(E_p[f(X)]\\). A batch of samples should be indexed by `axis` dimensions. log_prob: Python callable which can return `log_prob(samples)`. Must correspond to the natural-logarithm of the pdf/pmf of each sample. Only @@ -317,7 +311,7 @@ def expectation(f, samples, log_prob=None, use_reparametrization=True, Returns: approx_expectation: `Tensor` corresponding to the Monte-Carlo approximation - of `\\(E_p[f(X)]\\)`. + of \\(E_p[f(X)]\\). Raises: ValueError: if `f` is not a Python `callable`. @@ -329,7 +323,7 @@ def expectation(f, samples, log_prob=None, use_reparametrization=True, if not callable(f): raise ValueError('`f` must be a callable function.') if use_reparametrization: - return math_ops.reduce_mean(f(samples), axis=axis, keep_dims=keep_dims) + return math_ops.reduce_mean(f(samples), axis=axis, keepdims=keep_dims) else: if not callable(log_prob): raise ValueError('`log_prob` must be a callable function.') @@ -349,7 +343,7 @@ def expectation(f, samples, log_prob=None, use_reparametrization=True, # "Is there a floating point value of x, for which x-x == 0 is false?" # http://stackoverflow.com/q/2686644 fx += stop(fx) * (logpx - stop(logpx)) # Add zeros_like(logpx). - return math_ops.reduce_mean(fx, axis=axis, keep_dims=keep_dims) + return math_ops.reduce_mean(fx, axis=axis, keepdims=keep_dims) def _sample_mean(values): diff --git a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch_test.py b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch_test.py index 17dcb49f47..f9c22283b7 100644 --- a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch_test.py +++ b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch_test.py @@ -45,7 +45,7 @@ from tensorflow.python.platform import googletest def _squared_loss(label, unused_weights, predictions): """Unweighted loss implementation.""" loss = math_ops.reduce_sum( - math_ops.square(predictions - label), 1, keep_dims=True) + math_ops.square(predictions - label), 1, keepdims=True) return loss diff --git a/tensorflow/contrib/checkpoint/python/split_dependency_test.py b/tensorflow/contrib/checkpoint/python/split_dependency_test.py index cb964c80e9..f1d9d19b04 100644 --- a/tensorflow/contrib/checkpoint/python/split_dependency_test.py +++ b/tensorflow/contrib/checkpoint/python/split_dependency_test.py @@ -73,7 +73,7 @@ class OnlyOneDep(checkpointable.Checkpointable): class SplitTests(test.TestCase): - @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) + @test_util.run_in_graph_and_eager_modes() def testSaveRestoreSplitDep(self): save_checkpoint = checkpointable_utils.Checkpoint( dep=SaveTensorSlicesAsDeps()) diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt index bdf3e98635..5f38a8e5c7 100644 --- a/tensorflow/contrib/cmake/CMakeLists.txt +++ b/tensorflow/contrib/cmake/CMakeLists.txt @@ -31,10 +31,14 @@ option(tensorflow_BUILD_PYTHON_TESTS "Build python unit tests " OFF) option(tensorflow_BUILD_MORE_PYTHON_TESTS "Build more python unit tests for contrib packages" OFF) option(tensorflow_BUILD_SHARED_LIB "Build TensorFlow as a shared library" OFF) option(tensorflow_OPTIMIZE_FOR_NATIVE_ARCH "Enable compiler optimizations for the native processor architecture (if available)" ON) -option(tensorflow_WIN_CPU_SIMD_OPTIONS "Enables CPU SIMD instructions") option(tensorflow_ENABLE_SNAPPY_SUPPORT "Enable SNAPPY compression support" ON) option(tensorflow_DISABLE_EIGEN_FORCEINLINE "Disable forceinline, to speed up build on windows." OFF) +# SIMD, MKL and MKLDNN options +option(tensorflow_WIN_CPU_SIMD_OPTIONS "Enables CPU SIMD instructions" OFF) +option(tensorflow_ENABLE_MKL_SUPPORT "Enable Intel MKL support" OFF) +option(tensorflow_ENABLE_MKLDNN_SUPPORT "Enable Intel MKLDNN support, requires MKL enabled" OFF) + # GPU, CUDA and cuDNN options option(tensorflow_ENABLE_GPU "Enable GPU support" OFF) set(tensorflow_CUDA_VERSION "9.0" CACHE STRING "CUDA version to build against") @@ -124,8 +128,16 @@ endif() add_definitions(-DEIGEN_AVOID_STL_ARRAY) if(WIN32) - add_definitions(-DNOMINMAX -D_WIN32_WINNT=0x0A00 -DLANG_CXX11 -DCOMPILER_MSVC) - add_definitions(-DWIN32 -DOS_WIN -D_MBCS -DWIN64 -DWIN32_LEAN_AND_MEAN -DNOGDI -DPLATFORM_WINDOWS) + if(CMAKE_SIZEOF_VOID_P EQUAL 8) + # 64 bits + add_definitions(-DWIN64) + elseif(CMAKE_SIZEOF_VOID_P EQUAL 4) + # 32 bits + # temporary fix for #18241 + add_definitions(-DEIGEN_DEFAULT_DENSE_INDEX_TYPE=std::int64_t) + endif() + add_definitions(-DNOMINMAX -D_WIN32_WINNT=0x0A00 -DLANG_CXX11) + add_definitions(-DWIN32 -DOS_WIN -D_MBCS -DWIN32_LEAN_AND_MEAN -DNOGDI -DPLATFORM_WINDOWS) add_definitions(-DTENSORFLOW_USE_EIGEN_THREADPOOL -DEIGEN_HAS_C99_MATH) add_definitions(-DTF_COMPILE_LIBRARY) add_definitions(/bigobj /nologo /EHsc /GF /MP /Gm-) @@ -162,12 +174,21 @@ endif() # MSVC SIMD instructions if (tensorflow_WIN_CPU_SIMD_OPTIONS) + include(CheckCXXCompilerFlag) + if (tensorflow_ENABLE_MKL_SUPPORT) + add_definitions(-DINTEL_MKL -DEIGEN_USE_VML) + if (NOT tensorflow_ENABLE_MKLDNN_SUPPORT) + add_definitions(-DINTEL_MKL_ML) + endif() + endif() + CHECK_CXX_COMPILER_FLAG("-fopenmp" COMPILER_OPT_OPENMP_SUPPORT) + if (COMPILER_OPT_OPENMP_SUPPORT) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp") + endif() if (WIN32) - CHECK_CXX_COMPILER_FLAG("${tensorflow_WIN_CPU_SIMD_OPTIONS}" COMPILER_OPT_WIN_CPU_SIMD_SUPPORTED) + CHECK_CXX_COMPILER_FLAG(${tensorflow_WIN_CPU_SIMD_OPTIONS} COMPILER_OPT_WIN_CPU_SIMD_SUPPORTED) if(COMPILER_OPT_WIN_CPU_SIMD_SUPPORTED) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${tensorflow_WIN_CPU_SIMD_OPTIONS}") - else() - message(FATAL_ERROR "${tensorflow_WIN_CPU_SIMD_OPTIONS} not supported") endif() endif() endif() @@ -302,6 +323,43 @@ if(HAIKU) list(APPEND tensorflow_EXTERNAL_LIBRARIES network) endif() +if (tensorflow_ENABLE_MKL_SUPPORT) + if (WIN32) + find_path(MKL_HOME_PLATFORM mkl + PATHS ${MKL_HOME} ${MKL_HOME}/../ ${MKL_HOME}/../../ + PATH_SUFFIXES windows) + set(MKL_INCLUDE_DIRS ${MKL_HOME_PLATFORM}/mkl/include) + set(MKL_LINK_DIRS + ${MKL_HOME_PLATFORM}/mkl/lib/intel64 + ${MKL_HOME_PLATFORM}/tbb/lib/intel64/vc_mt + ${MKL_HOME_PLATFORM}/compiler/lib/intel64 + ${MKL_HOME_PLATFORM}/mkl/tools/builder/lib) + set(MKL_REDIST_DLL_DIRS + ${MKL_HOME_PLATFORM}/redist/intel64/mkl + ${MKL_HOME_PLATFORM}/redist/intel64/tbb/vc_mt + ${MKL_HOME_PLATFORM}/redist/intel64/compiler) + list(APPEND tensorflow_EXTERNAL_LIBRARIES + mkl_intel_lp64_dll mkl_sequential_dll mkl_core_dll mkl_rt mkl_cdll_intel64) + endif() + if (UNIX) + # Fix me: complete the path on linux + find_path(MKL_HOME_PLATFORM mkl + HINTS ${MKL_HOME} ${MKL_HOME}/../ ${MKL_HOME}/../../ + PATH_SUFFIXES linux) + set(MKL_INCLUDE_DIRS ${MKL_HOME_PLATFORM}/mkl/include) + set(MKL_LINK_DIRS) # incompleted + set(MKL_REDIST_SO_DIRS) # incompleted + endif() + include_directories(${MKL_INCLUDE_DIRS}) + link_directories(${MKL_LINK_DIRS}) + if (tensorflow_ENABLE_MKLDNN_SUPPORT) + include(mkldnn) + list(APPEND tensorflow_EXTERNAL_LIBRARIES ${mkldnn_STATIC_LIBRARIES}) + list(APPEND tensorflow_EXTERNAL_DEPENDENCIES mkldnn) + include_directories(${mkldnn_INCLUDE_DIRS}) + endif() +endif (tensorflow_ENABLE_MKL_SUPPORT) + if (tensorflow_ENABLE_GPU) if (NOT WIN32) # Default install paths for cuda libraries in Linux diff --git a/tensorflow/contrib/cmake/README.md b/tensorflow/contrib/cmake/README.md index fe83bb3204..0b79f718d4 100644 --- a/tensorflow/contrib/cmake/README.md +++ b/tensorflow/contrib/cmake/README.md @@ -128,6 +128,18 @@ Step-by-step Windows build D:\local\cuda\bin ``` + * When building with MKL support after installing [MKL](https://software.intel.com/en-us/mkl) from INTEL, append its bin directories to your PATH environment variable. + + In case TensorFlow fails to find the MKL dll's during initialization, check your PATH environment variable. + It should contain the directory of the MKL dlls. For example: + + ``` + D:\Tools\IntelSWTools\compilers_and_libraries\windows\redist\intel64\mkl + D:\Tools\IntelSWTools\compilers_and_libraries\windows\redist\intel64\compiler + D:\Tools\IntelSWTools\compilers_and_libraries\windows\redist\intel64\tbb\vc_mt + ``` + + * We assume that `cmake` and `git` are installed and in your `%PATH%`. If for example `cmake` is not in your path and it is installed in `C:\Program Files (x86)\CMake\bin\cmake.exe`, you can add this directory @@ -166,7 +178,15 @@ Step-by-step Windows build More? -Dtensorflow_ENABLE_GPU=ON ^ More? -DCUDNN_HOME="D:\...\cudnn" ``` + To build with MKL support add "^" at the end of the last line above following with: + + ``` + More? -Dtensorflow_ENABLE_MKL_SUPPORT=ON ^ + More? -DMKL_HOME="D:\...\compilers_and_libraries" + ``` + To enable SIMD instructions with MSVC, as AVX and SSE, define it as follows: + ``` More? -Dtensorflow_WIN_CPU_SIMD_OPTIONS=/arch:AVX ``` @@ -226,6 +246,7 @@ Step-by-step Windows build ``` ctest -C RelWithDebInfo ``` + * `-Dtensorflow_BUILD_MORE_PYTHON_TESTS=(ON|OFF)`. Defaults to `OFF`. This enables python tests on serveral major packages. This option is only valid if this and tensorflow_BUILD_PYTHON_TESTS are both set as `ON`. After building the python wheel, you need to install the new wheel before running the tests. @@ -234,6 +255,12 @@ Step-by-step Windows build ctest -C RelWithDebInfo ``` + * `-Dtensorflow_ENABLE_MKL_SUPPORT=(ON|OFF)`. Defaults to `OFF`. Include MKL support. If MKL is enabled you need to install the [Intel Math Kernal Library](https://software.intel.com/en-us/mkl). + CMake will expect the location of MKL in -MKL_HOME=path_you_install_mkl. + + * `-Dtensorflow_ENABLE_MKLDNN_SUPPORT=(ON|OFF)`. Defaults to `OFF`. Include MKL DNN support. MKL DNN is [Intel(R) Math Kernel Library for Deep Neural Networks (Intel(R) MKL-DNN)](https://github.com/intel/mkl-dnn). You have to add `-Dtensorflow_ENABLE_MKL_SUPPORT=ON` before including MKL DNN support. + + 4. Invoke MSBuild to build TensorFlow. To build the C++ example program, which will be created as a `.exe` @@ -251,6 +278,7 @@ Step-by-step Windows build D:\...\build> MSBuild /p:Configuration=Release tf_python_build_pip_package.vcxproj ``` + Linux Continuous Integration build ================================== diff --git a/tensorflow/contrib/cmake/external/gemmlowp.cmake b/tensorflow/contrib/cmake/external/gemmlowp.cmake index a235442dc5..cdaa6b73b9 100644 --- a/tensorflow/contrib/cmake/external/gemmlowp.cmake +++ b/tensorflow/contrib/cmake/external/gemmlowp.cmake @@ -14,8 +14,8 @@ # ============================================================================== include (ExternalProject) -set(gemmlowp_URL https://github.com/google/gemmlowp/archive/6a2a90822e8546fc2bfa7044de0faf1c1cb4862f.zip) -set(gemmlowp_HASH SHA256=3447948d219f3270383766bbe08942888c0eb4e0ca6663c0e0548502ec5bb77d) +set(gemmlowp_URL https://github.com/google/gemmlowp/archive/38ebac7b059e84692f53e5938f97a9943c120d98.zip) +set(gemmlowp_HASH SHA256=b87faa7294dfcc5d678f22a59d2c01ca94ea1e2a3b488c38a95a67889ed0a658) set(gemmlowp_BUILD ${CMAKE_CURRENT_BINARY_DIR}/gemmlowp/src/gemmlowp) set(gemmlowp_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/gemmlowp/src/gemmlowp) diff --git a/tensorflow/contrib/cmake/external/mkldnn.cmake b/tensorflow/contrib/cmake/external/mkldnn.cmake new file mode 100644 index 0000000000..a639fdee36 --- /dev/null +++ b/tensorflow/contrib/cmake/external/mkldnn.cmake @@ -0,0 +1,44 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +include (ExternalProject) + +set(mkldnn_INCLUDE_DIRS ${CMAKE_CURRENT_BINARY_DIR}/mkldnn/src/mkldnn/include) +set(mkldnn_URL https://github.com/01org/mkl-dnn.git) +set(mkldnn_BUILD ${CMAKE_CURRENT_BINARY_DIR}/mkldnn/src/mkldnn/src) +set(mkldnn_TAG 3063b2e4c943983f6bf5f2fb9a490d4a998cd291) + +if(WIN32) + if(${CMAKE_GENERATOR} MATCHES "Visual Studio.*") + set(mkldnn_STATIC_LIBRARIES ${CMAKE_CURRENT_BINARY_DIR}/mkldnn/src/mkldnn/src/Release/mkldnn.lib) + else() + set(mkldnn_STATIC_LIBRARIES ${CMAKE_CURRENT_BINARY_DIR}/mkldnn/src/mkldnn/src/mkldnn.lib) + endif() +else() + set(mkldnn_STATIC_LIBRARIES ${CMAKE_CURRENT_BINARY_DIR}/mkldnn/src/mkldnn/src/libmkldnn.a) +endif() + +ExternalProject_Add(mkldnn + PREFIX mkldnn + GIT_REPOSITORY ${mkldnn_URL} + GIT_TAG ${mkldnn_TAG} + DOWNLOAD_DIR "${DOWNLOAD_LOCATION}" + BUILD_IN_SOURCE 1 + BUILD_BYPRODUCTS ${mkldnn_STATIC_LIBRARIES} + INSTALL_COMMAND "" + CMAKE_CACHE_ARGS + -DCMAKE_BUILD_TYPE:STRING=Release + -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF + -DMKLINC:STRING=${MKL_INCLUDE_DIRS} +) diff --git a/tensorflow/contrib/cmake/external/png.cmake b/tensorflow/contrib/cmake/external/png.cmake index 6cd66a6599..ad2af01bc0 100644 --- a/tensorflow/contrib/cmake/external/png.cmake +++ b/tensorflow/contrib/cmake/external/png.cmake @@ -15,32 +15,33 @@ include (ExternalProject) set(png_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/external/png_archive) -set(png_URL https://storage.googleapis.com/libpng-public-archive/libpng-1.2.53.tar.gz) -set(png_HASH SHA256=e05c9056d7f323088fd7824d8c6acc03a4a758c4b4916715924edc5dd3223a72) +set(png_URL https://mirror.bazel.build/github.com/glennrp/libpng/archive/v1.6.34.tar.gz) +set(png_HASH SHA256=e45ce5f68b1d80e2cb9a2b601605b374bdf51e1798ef1c2c2bd62131dfcf9eef) set(png_BUILD ${CMAKE_BINARY_DIR}/png/src/png) set(png_INSTALL ${CMAKE_BINARY_DIR}/png/install) if(WIN32) if(${CMAKE_GENERATOR} MATCHES "Visual Studio.*") set(png_STATIC_LIBRARIES - debug ${CMAKE_BINARY_DIR}/png/install/lib/libpng12_staticd.lib - optimized ${CMAKE_BINARY_DIR}/png/install/lib/libpng12_static.lib) + debug ${CMAKE_BINARY_DIR}/png/install/lib/libpng16_staticd.lib + optimized ${CMAKE_BINARY_DIR}/png/install/lib/libpng16_static.lib) else() if(CMAKE_BUILD_TYPE EQUAL Debug) set(png_STATIC_LIBRARIES - ${CMAKE_BINARY_DIR}/png/install/lib/libpng12_staticd.lib) + ${CMAKE_BINARY_DIR}/png/install/lib/libpng16_staticd.lib) else() set(png_STATIC_LIBRARIES - ${CMAKE_BINARY_DIR}/png/install/lib/libpng12_static.lib) + ${CMAKE_BINARY_DIR}/png/install/lib/libpng16_static.lib) endif() endif() else() - set(png_STATIC_LIBRARIES ${CMAKE_BINARY_DIR}/png/install/lib/libpng12.a) + set(png_STATIC_LIBRARIES ${CMAKE_BINARY_DIR}/png/install/lib/libpng16.a) endif() set(png_HEADERS - "${png_INSTALL}/include/libpng12/png.h" - "${png_INSTALL}/include/libpng12/pngconf.h" + "${png_INSTALL}/include/libpng16/png.h" + "${png_INSTALL}/include/libpng16/pngconf.h" + "${png_INSTALL}/include/libpng16/pnglibconf.h" ) ExternalProject_Add(png diff --git a/tensorflow/contrib/cmake/external/sqlite.cmake b/tensorflow/contrib/cmake/external/sqlite.cmake index 57c4ae7651..7f835d2d51 100644 --- a/tensorflow/contrib/cmake/external/sqlite.cmake +++ b/tensorflow/contrib/cmake/external/sqlite.cmake @@ -15,8 +15,8 @@ include (ExternalProject) set(sqlite_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/external/sqlite) -set(sqlite_URL https://mirror.bazel.build/www.sqlite.org/2017/sqlite-amalgamation-3200000.zip) -set(sqlite_HASH SHA256=208780b3616f9de0aeb50822b7a8f5482f6515193859e91ed61637be6ad74fd4) +set(sqlite_URL https://mirror.bazel.build/www.sqlite.org/2018/sqlite-amalgamation-3230100.zip) +set(sqlite_HASH SHA256=4239a1f69e5721d07d9a374eb84d594225229e54be4ee628da2995f4315d8dfc) set(sqlite_BUILD ${CMAKE_CURRENT_BINARY_DIR}/sqlite/src/sqlite) set(sqlite_INSTALL ${CMAKE_CURRENT_BINARY_DIR}/sqlite/install) diff --git a/tensorflow/contrib/cmake/tf_core_framework.cmake b/tensorflow/contrib/cmake/tf_core_framework.cmake index a1c320347f..b47c32f1c4 100644 --- a/tensorflow/contrib/cmake/tf_core_framework.cmake +++ b/tensorflow/contrib/cmake/tf_core_framework.cmake @@ -276,7 +276,7 @@ add_custom_command(OUTPUT __force_rebuild COMMAND ${CMAKE_COMMAND} -E echo) add_custom_command(OUTPUT ${VERSION_INFO_CC} COMMAND ${PYTHON_EXECUTABLE} ${tensorflow_source_dir}/tensorflow/tools/git/gen_git_source.py - --raw_generate ${VERSION_INFO_CC} + ARGS --raw_generate ${VERSION_INFO_CC} --source_dir ${tensorflow_source_dir} --git_tag_override=${GIT_TAG_OVERRIDE} DEPENDS __force_rebuild) set(tf_version_srcs ${tensorflow_source_dir}/tensorflow/core/util/version_info.cc) @@ -341,9 +341,3 @@ add_dependencies(tf_core_framework tf_core_lib proto_text ) - -if(WIN32) - # Cmake > 3.6 will quote this as -D"__VERSION__=\"MSVC\"" which nvcc fails on. - # Instead of defining this global, limit it to tf_core_framework where its used. - target_compile_definitions(tf_core_framework PRIVATE __VERSION__="MSVC") -endif() diff --git a/tensorflow/contrib/cmake/tf_python.cmake b/tensorflow/contrib/cmake/tf_python.cmake index f6aaf41f73..c4bdb69d82 100755 --- a/tensorflow/contrib/cmake/tf_python.cmake +++ b/tensorflow/contrib/cmake/tf_python.cmake @@ -554,12 +554,13 @@ if(WIN32) set(pywrap_tensorflow_deffile "${CMAKE_CURRENT_BINARY_DIR}/pywrap_tensorflow.def") endif() set_source_files_properties(${pywrap_tensorflow_deffile} PROPERTIES GENERATED TRUE) - + math(EXPR tensorflow_target_bitness "${CMAKE_SIZEOF_VOID_P}*8") add_custom_command(TARGET pywrap_tensorflow_internal_static POST_BUILD COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/tools/create_def_file.py --input "${pywrap_tensorflow_internal_static_dependencies}" --output "${pywrap_tensorflow_deffile}" --target _pywrap_tensorflow_internal.pyd + --bitness "${tensorflow_target_bitness}" BYPRODUCTS ${pywrap_tensorflow_deffile} # Required for Ninja ) endif(WIN32) @@ -589,6 +590,12 @@ add_library(pywrap_tensorflow_internal SHARED ${pywrap_tensorflow_deffile} ) +# There is a bug in GCC 5 resulting in undefined reference to a __cpu_model function when +# linking to the tensorflow library. Adding the following libraries fixes it. +if(CMAKE_COMPILER_IS_GNUCC AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 5.0) + target_link_libraries(pywrap_tensorflow_internal PRIVATE gcc_s gcc) +endif() + if(WIN32) add_dependencies(pywrap_tensorflow_internal pywrap_tensorflow_internal_static) endif(WIN32) diff --git a/tensorflow/contrib/cmake/tf_shared_lib.cmake b/tensorflow/contrib/cmake/tf_shared_lib.cmake index 9738bbeb9a..38f40452b5 100644 --- a/tensorflow/contrib/cmake/tf_shared_lib.cmake +++ b/tensorflow/contrib/cmake/tf_shared_lib.cmake @@ -52,12 +52,13 @@ if(WIN32) set(tensorflow_deffile "${CMAKE_CURRENT_BINARY_DIR}/tensorflow.def") endif() set_source_files_properties(${tensorflow_deffile} PROPERTIES GENERATED TRUE) - + math(EXPR tensorflow_target_bitness "${CMAKE_SIZEOF_VOID_P}*8") add_custom_command(TARGET tensorflow_static POST_BUILD COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/tools/create_def_file.py --input "${tensorflow_static_dependencies}" --output "${tensorflow_deffile}" --target tensorflow.dll + --bitness "${tensorflow_target_bitness}" ) endif(WIN32) diff --git a/tensorflow/contrib/cmake/tf_stream_executor.cmake b/tensorflow/contrib/cmake/tf_stream_executor.cmake index 91ca33f4c4..af48ef1fd4 100644 --- a/tensorflow/contrib/cmake/tf_stream_executor.cmake +++ b/tensorflow/contrib/cmake/tf_stream_executor.cmake @@ -65,6 +65,12 @@ if (tensorflow_ENABLE_GPU) file(GLOB tf_stream_executor_gpu_srcs "${tensorflow_source_dir}/tensorflow/stream_executor/cuda/*.cc" ) + if (NOT tensorflow_BUILD_CC_TESTS) + file(GLOB tf_stream_executor_gpu_tests + "${tensorflow_source_dir}/tensorflow/stream_executor/cuda/*_test.cc" + ) + list(REMOVE_ITEM tf_stream_executor_gpu_srcs ${tf_stream_executor_gpu_tests}) + endif() list(APPEND tf_stream_executor_srcs ${tf_stream_executor_gpu_srcs}) endif() diff --git a/tensorflow/contrib/cmake/tools/create_def_file.py b/tensorflow/contrib/cmake/tools/create_def_file.py index 53c2285699..cffe069aa3 100644 --- a/tensorflow/contrib/cmake/tools/create_def_file.py +++ b/tensorflow/contrib/cmake/tools/create_def_file.py @@ -63,7 +63,7 @@ INCLUDE_RE = re.compile(r"^(TF_\w*)$|" r"^(TFE_\w*)$|" r"tensorflow::|" r"functor::|" - r"nsync_|" + r"\?nsync_|" r"perftools::gputools") # We want to identify data members explicitly in the DEF file, so that no one @@ -87,6 +87,7 @@ def get_args(): required=True) parser.add_argument("--output", help="output deffile", required=True) parser.add_argument("--target", help="name of the target", required=True) + parser.add_argument("--bitness", help="build target bitness", required=True) args = parser.parse_args() return args @@ -125,7 +126,10 @@ def main(): # Header for the def file. def_fp.write("LIBRARY " + args.target + "\n") def_fp.write("EXPORTS\n") - def_fp.write("\t ??1OpDef@tensorflow@@UEAA@XZ\n") + if args.bitness == "64": + def_fp.write("\t??1OpDef@tensorflow@@UEAA@XZ\n") + else: + def_fp.write("\t??1OpDef@tensorflow@@UAE@XZ\n") # Each symbols returned by undname matches the same position in candidates. # We compare on undname but use the decorated name from candidates. diff --git a/tensorflow/contrib/crf/python/kernel_tests/crf_test.py b/tensorflow/contrib/crf/python/kernel_tests/crf_test.py index 721dc4d080..a5e065b93a 100644 --- a/tensorflow/contrib/crf/python/kernel_tests/crf_test.py +++ b/tensorflow/contrib/crf/python/kernel_tests/crf_test.py @@ -281,6 +281,21 @@ class CrfTest(test.TestCase): self.assertEqual(list(tf_actual_max_sequence[:sequence_lengths]), expected_max_sequence[:sequence_lengths]) + def testCrfDecodeZeroSeqLength(self): + """ + Test that crf_decode works when sequence_length contains one or more zeros. + """ + with self.test_session() as sess: + inputs = constant_op.constant(np.ones([2, 10, 5], + dtype=np.float32)) + transition_params = constant_op.constant(np.ones([5, 5], + dtype=np.float32)) + sequence_lengths = constant_op.constant(np.zeros([2], + dtype=np.int32)) + values = crf.crf_decode(inputs, transition_params, sequence_lengths) + tags, scores = sess.run(values) + self.assertEqual(len(tags.shape), 2) + self.assertEqual(len(scores.shape), 1) if __name__ == "__main__": test.main() diff --git a/tensorflow/contrib/crf/python/ops/crf.py b/tensorflow/contrib/crf/python/ops/crf.py index 1233c8f251..e37c029ceb 100644 --- a/tensorflow/contrib/crf/python/ops/crf.py +++ b/tensorflow/contrib/crf/python/ops/crf.py @@ -479,15 +479,17 @@ def crf_decode(potentials, transition_params, sequence_length): initial_state = array_ops.slice(potentials, [0, 0, 0], [-1, 1, -1]) initial_state = array_ops.squeeze(initial_state, axis=[1]) # [B, O] inputs = array_ops.slice(potentials, [0, 1, 0], [-1, -1, -1]) # [B, T-1, O] + # sequence length is not allowed to be less than zero + sequence_length_less_one = math_ops.maximum(0, sequence_length - 1) backpointers, last_score = rnn.dynamic_rnn( # [B, T - 1, O], [B, O] crf_fwd_cell, inputs=inputs, - sequence_length=sequence_length - 1, + sequence_length=sequence_length_less_one, initial_state=initial_state, time_major=False, dtype=dtypes.int32) backpointers = gen_array_ops.reverse_sequence( # [B, T - 1, O] - backpointers, sequence_length - 1, seq_dim=1) + backpointers, sequence_length_less_one, seq_dim=1) # Computes backward decoding. Extract tag indices from backpointers. crf_bwd_cell = CrfDecodeBackwardRnnCell(num_tags) @@ -497,7 +499,7 @@ def crf_decode(potentials, transition_params, sequence_length): decode_tags, _ = rnn.dynamic_rnn( # [B, T - 1, 1] crf_bwd_cell, inputs=backpointers, - sequence_length=sequence_length - 1, + sequence_length=sequence_length_less_one, initial_state=initial_state, time_major=False, dtype=dtypes.int32) diff --git a/tensorflow/contrib/cudnn_rnn/python/layers/cudnn_rnn.py b/tensorflow/contrib/cudnn_rnn/python/layers/cudnn_rnn.py index 00d9544602..d58198faf3 100644 --- a/tensorflow/contrib/cudnn_rnn/python/layers/cudnn_rnn.py +++ b/tensorflow/contrib/cudnn_rnn/python/layers/cudnn_rnn.py @@ -358,7 +358,8 @@ class _CudnnRNN(base_layer.Layer): "CUDA/CuDNN generations.") # Initialize opaque params with a tensor. self.kernel = vs.get_variable( - "opaque_kernel", initializer=opaque_params_t, validate_shape=False) + "opaque_kernel", dtype=self._plain_dtype, + initializer=opaque_params_t, validate_shape=False) # Create saveable in the outer scope of the cudnn subgraph, such that # alternative subgraph with platform-independent rnn cells can load the # checkpoints directly. diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD index 9d1e8b20c2..d59dd17aea 100644 --- a/tensorflow/contrib/data/python/kernel_tests/BUILD +++ b/tensorflow/contrib/data/python/kernel_tests/BUILD @@ -4,7 +4,7 @@ licenses(["notice"]) # Apache 2.0 exports_files(["LICENSE"]) -load("//tensorflow:tensorflow.bzl", "py_test", "tf_py_test") +load("//tensorflow:tensorflow.bzl", "cuda_py_test", "py_test", "tf_py_test") py_test( name = "batch_dataset_op_test", @@ -482,12 +482,11 @@ py_test( ], ) -py_test( +cuda_py_test( name = "prefetching_ops_test", size = "small", srcs = ["prefetching_ops_test.py"], - srcs_version = "PY2AND3", - deps = [ + additional_deps = [ "//tensorflow/contrib/data/python/ops:prefetching_ops", "//tensorflow/core:protos_all_py", "//tensorflow/python:client_testlib", diff --git a/tensorflow/contrib/data/python/kernel_tests/dataset_serialization_test_base.py b/tensorflow/contrib/data/python/kernel_tests/dataset_serialization_test_base.py index dbc35097dd..78ecce8f7d 100644 --- a/tensorflow/contrib/data/python/kernel_tests/dataset_serialization_test_base.py +++ b/tensorflow/contrib/data/python/kernel_tests/dataset_serialization_test_base.py @@ -163,7 +163,7 @@ class DatasetSerializationTestBase(test.TestCase): num_outputs, sparse_tensors=False, verify_exhausted=True): - """Verifies that restoring into an already initilized iterator works. + """Verifies that restoring into an already initialized iterator works. Args: ds_fn: See `run_core_tests`. diff --git a/tensorflow/contrib/data/python/kernel_tests/interleave_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/interleave_dataset_op_test.py index f8556a1b28..43aa4b1bd0 100644 --- a/tensorflow/contrib/data/python/kernel_tests/interleave_dataset_op_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/interleave_dataset_op_test.py @@ -409,7 +409,7 @@ class ParallelInterleaveDatasetTest(test.TestCase): def _testTwoThreadsNoContentionWithRaces(self, sloppy=False): """Tests where all the workers race in producing elements. - Note: this is in contrast with the prevous test which carefully sequences + Note: this is in contrast with the previous test which carefully sequences the execution of the map functions. Args: @@ -495,7 +495,7 @@ class ParallelInterleaveDatasetTest(test.TestCase): def _testTwoThreadsNoContentionWithRacesAndBlocking(self, sloppy=False): """Tests where all the workers race in producing elements. - Note: this is in contrast with the prevous test which carefully sequences + Note: this is in contrast with the previous test which carefully sequences the execution of the map functions. @@ -928,8 +928,7 @@ class DirectedInterleaveDatasetTest(test.TestCase): sess.run(next_element) def _normalize(self, vec): - batched = (len(vec.shape) == 2) - return vec / vec.sum(axis=1, keepdims=True) if batched else vec / vec.sum() + return vec / vec.sum() def _chi2(self, expected, actual): actual = np.asarray(actual) @@ -938,35 +937,43 @@ class DirectedInterleaveDatasetTest(test.TestCase): chi2 = np.sum(diff * diff / expected, axis=0) return chi2 + def _testSampleFromDatasetsHelper(self, weights, num_datasets, num_samples): + # Create a dataset that samples each integer in `[0, num_datasets)` + # with probability given by `weights[i]`. + dataset = interleave_ops.sample_from_datasets([ + dataset_ops.Dataset.from_tensors(i).repeat(None) + for i in range(num_datasets) + ], weights) + dataset = dataset.take(num_samples) + iterator = dataset.make_one_shot_iterator() + next_element = iterator.get_next() + + with self.test_session() as sess: + freqs = np.zeros([num_datasets]) + for _ in range(num_samples): + freqs[sess.run(next_element)] += 1 + with self.assertRaises(errors.OutOfRangeError): + sess.run(next_element) + + return freqs + def testSampleFromDatasets(self): - random_seed.set_random_seed(1618) + random_seed.set_random_seed(1619) num_samples = 10000 - rand_probs = self._normalize(np.random.random_sample((10,))) - rand_probs2 = self._normalize(np.random.random_sample((15,))) + rand_probs = self._normalize(np.random.random_sample((15,))) - for probs in [[.5, .5], [.85, .05, .1], rand_probs, rand_probs2]: + # Use chi-squared test to assert that the observed distribution matches the + # expected distribution. Based on the implementation in + # "tensorflow/python/kernel_tests/multinomial_op_test.py". + for probs in [[.85, .05, .1], rand_probs]: probs = np.asarray(probs) + classes = len(probs) + freqs = self._testSampleFromDatasetsHelper(probs, classes, num_samples) + self.assertLess(self._chi2(probs, freqs / num_samples), 1e-3) - # Create a dataset that samples each integer in `[0, probs.shape[0])` - # with probability given by `probs[i]`. - dataset = interleave_ops.sample_from_datasets([ - dataset_ops.Dataset.from_tensors(i).repeat(None) - for i in range(probs.shape[0]) - ], probs) - dataset = dataset.take(num_samples) - iterator = dataset.make_one_shot_iterator() - next_element = iterator.get_next() - - with self.test_session() as sess: - freqs = np.zeros_like(probs) - for _ in range(num_samples): - freqs[sess.run(next_element)] += 1 - with self.assertRaises(errors.OutOfRangeError): - sess.run(next_element) - - # Use chi-squared test to assert that the observed distribution - # matches the expected distribution. Based on the implementation - # in "tensorflow/python/kernel_tests/multinomial_op_test.py". + # Also check that `weights` as a dataset samples correctly. + probs_ds = dataset_ops.Dataset.from_tensors(probs).repeat() + freqs = self._testSampleFromDatasetsHelper(probs_ds, classes, num_samples) self.assertLess(self._chi2(probs, freqs / num_samples), 1e-3) def testErrors(self): diff --git a/tensorflow/contrib/data/python/kernel_tests/stats_dataset_ops_test.py b/tensorflow/contrib/data/python/kernel_tests/stats_dataset_ops_test.py index 7acbc676ce..5c74ed6ae7 100644 --- a/tensorflow/contrib/data/python/kernel_tests/stats_dataset_ops_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/stats_dataset_ops_test.py @@ -201,6 +201,14 @@ class StatsDatasetSerializationTest( lambda x: array_ops.tile([x], ops.convert_to_tensor([x]))).apply( stats_ops.bytes_produced_stats("bytes_produced")) + def test_bytes_produced_stats_invalid_tag_shape(self): + with self.assertRaisesRegexp( + ValueError, 'Shape must be rank 0 but is rank 1'): + self.run_core_tests( + lambda: dataset_ops.Dataset.range(100).apply( + stats_ops.bytes_produced_stats(["bytes_produced"])), + None, 100) + def testBytesStatsDatasetSaveableCore(self): num_outputs = 100 self.run_core_tests( @@ -218,6 +226,14 @@ class StatsDatasetSerializationTest( return dataset_ops.Dataset.range(num_elements).apply( stats_ops.latency_stats(tag1)).apply(stats_ops.latency_stats(tag2)) + def test_latency_stats_invalid_tag_shape(self): + with self.assertRaisesRegexp( + ValueError, 'Shape must be rank 0 but is rank 1'): + self.run_core_tests( + lambda: dataset_ops.Dataset.range(100).apply( + stats_ops.latency_stats(["record_latency", "record_latency_2"])), + None, 100) + def testLatencyStatsDatasetSaveableCore(self): num_outputs = 100 diff --git a/tensorflow/contrib/data/python/ops/interleave_ops.py b/tensorflow/contrib/data/python/ops/interleave_ops.py index 106a1ef388..812a50ecbf 100644 --- a/tensorflow/contrib/data/python/ops/interleave_ops.py +++ b/tensorflow/contrib/data/python/ops/interleave_ops.py @@ -200,10 +200,11 @@ def sample_from_datasets(datasets, weights=None, seed=None): Args: datasets: A list of @{tf.data.Dataset} objects with compatible structure. - weights: (Optional.) A list of `len(datasets)` floating-point values, - where `weights[i]` represents the probability with which an element - should be sampled from `datasets[i]`. Defaults to a uniform distribution - across `datasets`. + weights: (Optional.) A list of `len(datasets)` floating-point values where + `weights[i]` represents the probability with which an element should be + sampled from `datasets[i]`, or a @{tf.data.Dataset} object where each + element is such a list. Defaults to a uniform distribution across + `datasets`. seed: (Optional.) A `tf.int64` scalar `tf.Tensor`, representing the random seed that will be used to create the distribution. See @{tf.set_random_seed} for behavior. @@ -219,24 +220,23 @@ def sample_from_datasets(datasets, weights=None, seed=None): """ num_datasets = len(datasets) if weights is None: - weights = array_ops.ones( - [num_datasets], dtype=dtypes.float32, name="weights") - else: + weights = dataset_ops.Dataset.from_tensors([1.0] * num_datasets).repeat() + elif not isinstance(weights, dataset_ops.Dataset): weights = ops.convert_to_tensor(weights, name="weights") if weights.dtype not in (dtypes.float32, dtypes.float64): raise TypeError("`weights` must be convertible to a tensor of " "`tf.float32` or `tf.float64` elements.") if not weights.shape.is_compatible_with([num_datasets]): raise ValueError("`weights` must be a vector of length `len(datasets)`.") + weights = dataset_ops.Dataset.from_tensors(weights).repeat() # The `stateless_multinomial()` op expects log-probabilities, as opposed to # weights. - logits = math_ops.log(weights, name="logits") - - def select_dataset(seed): + logits_ds = weights.map(lambda *p: math_ops.log(p, name="logits")) + def select_dataset(logits, seed): return array_ops.squeeze( - stateless.stateless_multinomial([logits], 1, seed=seed), axis=[0, 1]) - - selector_input = random_ops.RandomDataset(seed).batch(2).map(select_dataset) + stateless.stateless_multinomial(logits, 1, seed=seed), axis=[0, 1]) + selector_input = dataset_ops.Dataset.zip( + (logits_ds, random_ops.RandomDataset(seed).batch(2))).map(select_dataset) return DirectedInterleaveDataset(selector_input, datasets) diff --git a/tensorflow/contrib/data/python/ops/prefetching_ops.py b/tensorflow/contrib/data/python/ops/prefetching_ops.py index 89c04dc89a..e4c9f8b58a 100644 --- a/tensorflow/contrib/data/python/ops/prefetching_ops.py +++ b/tensorflow/contrib/data/python/ops/prefetching_ops.py @@ -114,11 +114,13 @@ class _PrefetchToDeviceIterator(object): ret = remote_iterator.get_next() return nest.flatten(sparse.serialize_sparse_tensors(ret)) + iterator_device = gen_dataset_ops.iterator_get_device( + self._input_iterator._iterator_resource) + with ops.device(device): self._buffering_resource = function_buffering_resource( f=_prefetch_fn, - target_device=gen_dataset_ops.iterator_get_device( - self._input_iterator._iterator_resource), + target_device=iterator_device, string_arg=input_iterator_handle, buffer_size=buffer_size, shared_name=shared_name) diff --git a/tensorflow/contrib/data/python/ops/scan_ops.py b/tensorflow/contrib/data/python/ops/scan_ops.py index 711a538697..60ef7efba4 100644 --- a/tensorflow/contrib/data/python/ops/scan_ops.py +++ b/tensorflow/contrib/data/python/ops/scan_ops.py @@ -57,7 +57,7 @@ class _ScanDataset(dataset_ops.Dataset): self._output_shapes = None self._output_types = None - # Iteratively rerun the scan function until reaching a fixed pont on + # Iteratively rerun the scan function until reaching a fixed point on # `self._state_shapes`. need_to_rerun = True while need_to_rerun: diff --git a/tensorflow/contrib/distributions/python/kernel_tests/shape_test.py b/tensorflow/contrib/distributions/python/kernel_tests/shape_test.py index c8d795c3f6..243b5a0348 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/shape_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/shape_test.py @@ -584,7 +584,6 @@ class DistributionShapeTest(test.TestCase): def testDistributionShapeGetDimsStatic(self): with self.test_session(): - shaper = _DistributionShape(batch_ndims=0, event_ndims=0) shaper = _DistributionShape(batch_ndims=0, event_ndims=0) x = 1 self.assertAllEqual((_empty_shape, _empty_shape, _empty_shape), diff --git a/tensorflow/contrib/eager/python/saver_test.py b/tensorflow/contrib/eager/python/saver_test.py index 1a7f7b85e6..4032e755f6 100644 --- a/tensorflow/contrib/eager/python/saver_test.py +++ b/tensorflow/contrib/eager/python/saver_test.py @@ -102,7 +102,6 @@ class SaverTest(test.TestCase): # Can still restore it. saver.restore(ckpt_prefix) self.assertEqual(v1.read_value().numpy(), 1.0) - self.assertEqual(v1.read_value().numpy(), 1.0) # However, cannot restore it with default name. with self.assertRaisesOpError('not found in checkpoint'): saver = _saver.Saver([v1, v2]).restore(ckpt_prefix) diff --git a/tensorflow/contrib/estimator/python/estimator/head.py b/tensorflow/contrib/estimator/python/estimator/head.py index ae2fd8b490..3dcf0374c8 100644 --- a/tensorflow/contrib/estimator/python/estimator/head.py +++ b/tensorflow/contrib/estimator/python/estimator/head.py @@ -485,7 +485,7 @@ class _MultiLabelHead(head_lib._Head): # pylint:disable=protected-access reduction=losses.Reduction.NONE) # Averages loss over classes. unweighted_loss = math_ops.reduce_mean( - unweighted_loss, axis=-1, keep_dims=True) + unweighted_loss, axis=-1, keepdims=True) weights = head_lib._get_weights_and_check_match_logits( # pylint:disable=protected-access, features=features, weight_column=self._weight_column, logits=logits) training_loss = losses.compute_weighted_loss( diff --git a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py index fa2697800e..a8774d6dab 100644 --- a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py +++ b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py @@ -456,7 +456,7 @@ def _get_local_devices(device_type): def _split_batch(features, labels, number_of_shards, device): - """Split input features and labes into batches.""" + """Split input features and labels into batches.""" def ensure_divisible_by_shards(sequence): batch_size = ops_lib.convert_to_tensor(sequence).get_shape()[0] @@ -602,7 +602,7 @@ def _local_device_setter(worker_device, ps_devices, ps_strategy): def _scale_tower_loss(tower_spec, loss_reduction, number_of_towers): - """Produce an EstimatorSpec with approproriately scaled loss.""" + """Produce an EstimatorSpec with appropriately scaled loss.""" if tower_spec.loss is None: return tower_spec diff --git a/tensorflow/contrib/factorization/python/ops/gmm_ops.py b/tensorflow/contrib/factorization/python/ops/gmm_ops.py index 5d77bc77e1..ccdd679d6a 100644 --- a/tensorflow/contrib/factorization/python/ops/gmm_ops.py +++ b/tensorflow/contrib/factorization/python/ops/gmm_ops.py @@ -54,10 +54,10 @@ def _covariance(x, diag): diagonal matrix just the diagonal is returned. """ num_points = math_ops.to_float(array_ops.shape(x)[0]) - x -= math_ops.reduce_mean(x, 0, keep_dims=True) + x -= math_ops.reduce_mean(x, 0, keepdims=True) if diag: cov = math_ops.reduce_sum( - math_ops.square(x), 0, keep_dims=True) / (num_points - 1) + math_ops.square(x), 0, keepdims=True) / (num_points - 1) else: cov = math_ops.matmul(x, x, transpose_a=True) / (num_points - 1) return cov @@ -313,7 +313,7 @@ class GmmAlgorithm(object): # TODO(xavigonzalvo): look into alternatives to log for # reparametrization of variance parameters. det_expanded = math_ops.reduce_sum( - math_ops.log(self._covs + 1e-3), 1, keep_dims=True) + math_ops.log(self._covs + 1e-3), 1, keepdims=True) diff = shard - self._means x2 = math_ops.square(diff) cov_expanded = array_ops.expand_dims(1.0 / (self._covs + 1e-3), 2) @@ -351,7 +351,7 @@ class GmmAlgorithm(object): shard_id: id of current shard_id. """ self._prior_probs[shard_id] = math_ops.reduce_logsumexp( - self._probs[shard_id], axis=1, keep_dims=True) + self._probs[shard_id], axis=1, keepdims=True) def _define_expectation_operation(self, shard_id): # Shape broadcasting. @@ -375,7 +375,7 @@ class GmmAlgorithm(object): """ # Soft assignment of each data point to each of the two clusters. self._points_in_k[shard_id] = math_ops.reduce_sum( - self._w[shard_id], 0, keep_dims=True) + self._w[shard_id], 0, keepdims=True) # Partial means. w_mul_x = array_ops.expand_dims( math_ops.matmul( @@ -454,7 +454,7 @@ class GmmAlgorithm(object): for shard_id, prior_probs in enumerate(self._prior_probs): op.append(prior_probs + math_ops.log(self._w[shard_id])) self._scores = array_ops.squeeze( - math_ops.reduce_logsumexp(op, axis=2, keep_dims=True), axis=0) + math_ops.reduce_logsumexp(op, axis=2, keepdims=True), axis=0) def gmm(inp, diff --git a/tensorflow/contrib/factorization/python/ops/kmeans.py b/tensorflow/contrib/factorization/python/ops/kmeans.py index bfe338c9f9..9ffdd3ba5e 100644 --- a/tensorflow/contrib/factorization/python/ops/kmeans.py +++ b/tensorflow/contrib/factorization/python/ops/kmeans.py @@ -374,11 +374,11 @@ class KMeansClustering(estimator.Estimator): than `num_clusters`, a TensorFlow runtime error occurs. distance_metric: The distance metric used for clustering. One of: * `KMeansClustering.SQUARED_EUCLIDEAN_DISTANCE`: Euclidean distance - between vectors `u` and `v` is defined as `\\(||u - v||_2\\)` + between vectors `u` and `v` is defined as \\(||u - v||_2\\) which is the square root of the sum of the absolute squares of the elements' difference. * `KMeansClustering.COSINE_DISTANCE`: Cosine distance between vectors - `u` and `v` is defined as `\\(1 - (u . v) / (||u||_2 ||v||_2)\\)`. + `u` and `v` is defined as \\(1 - (u . v) / (||u||_2 ||v||_2)\\). random_seed: Python integer. Seed for PRNG used to initialize centers. use_mini_batch: A boolean specifying whether to use the mini-batch k-means algorithm. See explanation above. diff --git a/tensorflow/contrib/framework/__init__.py b/tensorflow/contrib/framework/__init__.py index bb4f1eb384..11397e86bd 100644 --- a/tensorflow/contrib/framework/__init__.py +++ b/tensorflow/contrib/framework/__init__.py @@ -118,12 +118,13 @@ from tensorflow.python.framework.smart_cond import smart_cond from tensorflow.python.framework.smart_cond import smart_constant_value from tensorflow.python.framework.tensor_spec import BoundedTensorSpec from tensorflow.python.framework.tensor_spec import TensorSpec +from tensorflow.python.ops.array_ops import broadcast_to from tensorflow.python.ops.init_ops import convolutional_delta_orthogonal from tensorflow.python.ops.init_ops import convolutional_orthogonal_1d from tensorflow.python.ops.init_ops import convolutional_orthogonal_2d from tensorflow.python.ops.init_ops import convolutional_orthogonal_3d from tensorflow.python.util.all_util import remove_undocumented -_allowed_symbols = ['nest'] +_allowed_symbols = ['nest', 'broadcast_to'] remove_undocumented(__name__, allowed_exception_list=_allowed_symbols) diff --git a/tensorflow/contrib/framework/python/framework/tensor_util_test.py b/tensorflow/contrib/framework/python/framework/tensor_util_test.py index a2834b6489..8fc4f60492 100644 --- a/tensorflow/contrib/framework/python/framework/tensor_util_test.py +++ b/tensorflow/contrib/framework/python/framework/tensor_util_test.py @@ -48,7 +48,7 @@ class LocalVariabletest(test.TestCase): variables = variables_lib.local_variables() self.assertEquals(2, len(variables)) self.assertRaises(errors_impl.OpError, sess.run, variables) - variables_lib.initialize_variables(variables).run() + variables_lib.variables_initializer(variables).run() self.assertAllEqual(set([value0, value1]), set(sess.run(variables))) diff --git a/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op.py b/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op.py index a97adf622e..983b6dc8e5 100644 --- a/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op.py +++ b/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op.py @@ -65,7 +65,7 @@ def fused_conv2d_bias_activation(conv_input, side_input_scale: A scalar `float32` that will be multiplied by side_input. This is optional and defaults to 0. side_input: A `Tensor` of the format specified by `data_format`. - This is useful for imlementing ResNet blocks. + This is useful for implementing ResNet blocks. activation_mode: (optional) currently must be the default "Relu". Note that in qint8 mode, it also clips to 127, so acts like ReluX. data_format: Specifies the data format. diff --git a/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test.py b/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test.py index bb155aa249..3d0ed89932 100644 --- a/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test.py +++ b/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test.py @@ -566,7 +566,7 @@ def GetInceptionFwdTest(input_size, filter_size, stride, padding, return Test -def CalculateCovolvedOutputDim(input_dim, filter_dim, stride, padding_type): +def CalculateConvolvedOutputDim(input_dim, filter_dim, stride, padding_type): """Calculates the size of an output dimension of a strided convolution. Given the sizes of the corresponding dimension of the input and filter shapes, @@ -827,10 +827,10 @@ class FusedConvInt8Tests(test.TestCase): maxval=1.0, dtype=dtypes.float32), -1.0, 1.0, dtypes.qint8) - output_height = CalculateCovolvedOutputDim(input_height, filter_height, - vertical_stride, padding_type) - output_width = CalculateCovolvedOutputDim(input_width, filter_width, - horizontal_stride, padding_type) + output_height = CalculateConvolvedOutputDim(input_height, filter_height, + vertical_stride, padding_type) + output_width = CalculateConvolvedOutputDim(input_width, filter_width, + horizontal_stride, padding_type) print("output_height=", output_height, ", output_width=", output_width) side_input, _, _ = gen_array_ops.quantize_v2( diff --git a/tensorflow/contrib/gan/python/eval/python/sliced_wasserstein_impl.py b/tensorflow/contrib/gan/python/eval/python/sliced_wasserstein_impl.py index 4b10bc0f8e..4b1105f6bd 100644 --- a/tensorflow/contrib/gan/python/eval/python/sliced_wasserstein_impl.py +++ b/tensorflow/contrib/gan/python/eval/python/sliced_wasserstein_impl.py @@ -161,7 +161,7 @@ def _sliced_wasserstein(a, b, random_sampling_count, random_projection_dim): proj = random_ops.random_normal( [array_ops.shape(a)[1], random_projection_dim]) proj *= math_ops.rsqrt( - math_ops.reduce_sum(math_ops.square(proj), 0, keep_dims=True)) + math_ops.reduce_sum(math_ops.square(proj), 0, keepdims=True)) # Project both distributions and sort them. proj_a = math_ops.matmul(a, proj) proj_b = math_ops.matmul(b, proj) diff --git a/tensorflow/contrib/gan/python/features/python/virtual_batchnorm_impl.py b/tensorflow/contrib/gan/python/features/python/virtual_batchnorm_impl.py index f8b372546b..650eab97a3 100644 --- a/tensorflow/contrib/gan/python/features/python/virtual_batchnorm_impl.py +++ b/tensorflow/contrib/gan/python/features/python/virtual_batchnorm_impl.py @@ -64,11 +64,11 @@ def _statistics(x, axes): y = math_ops.cast(x, dtypes.float32) if x.dtype == dtypes.float16 else x # Compute true mean while keeping the dims for proper broadcasting. - shift = array_ops.stop_gradient(math_ops.reduce_mean(y, axes, keep_dims=True)) + shift = array_ops.stop_gradient(math_ops.reduce_mean(y, axes, keepdims=True)) - shifted_mean = math_ops.reduce_mean(y - shift, axes, keep_dims=True) + shifted_mean = math_ops.reduce_mean(y - shift, axes, keepdims=True) mean = shifted_mean + shift - mean_squared = math_ops.reduce_mean(math_ops.square(y), axes, keep_dims=True) + mean_squared = math_ops.reduce_mean(math_ops.square(y), axes, keepdims=True) mean = array_ops.squeeze(mean, axes) mean_squared = array_ops.squeeze(mean_squared, axes) diff --git a/tensorflow/contrib/hvx/README.md b/tensorflow/contrib/hvx/README.md index 163993a3f6..68e34f3b09 100644 --- a/tensorflow/contrib/hvx/README.md +++ b/tensorflow/contrib/hvx/README.md @@ -42,11 +42,12 @@ If you've finished walking through the quick start guide, you may want to try bu ### Build libhexagon\_nn\_skel.so -Download Hexagon NN library from codeaurora.org and build it. +Download Hexagon NN library from codeaurora.org and build it. For Hexagon SDK 3.0, we need use the compatible version([721b2d58f](https://source.codeaurora.org/quic/hexagon_nn/nnlib/commit/?id=721b2d58f0f4e2d5b182f41e6b7c4db5356bf0fb)) of nnlib. ```shell git clone https://source.codeaurora.org/quic/hexagon_nn/nnlib cd nnlib +git reset 721b2d58f --hard ``` Just follow the instructions in `README.HOW_TO_BUILD`. You can find the file `libhexagon_nn_skel.so` in `hexagon_Release_dynamic_toolv72_v60/ship`. diff --git a/tensorflow/contrib/image/kernels/adjust_hsv_in_yiq_op_gpu.cu.cc b/tensorflow/contrib/image/kernels/adjust_hsv_in_yiq_op_gpu.cu.cc index 1be97ae3d6..bbb3a3b18f 100644 --- a/tensorflow/contrib/image/kernels/adjust_hsv_in_yiq_op_gpu.cu.cc +++ b/tensorflow/contrib/image/kernels/adjust_hsv_in_yiq_op_gpu.cu.cc @@ -53,7 +53,7 @@ void AdjustHsvInYiqGPU::operator()(OpKernelContext* ctx, int channel_count, OP_REQUIRES_OK(ctx, ctx->allocate_temp( DT_FLOAT, TensorShape({kChannelSize * kChannelSize}), &tranformation_matrix)); - // TODO(huangyp): It takes about 3.5 us to comute tranformation_matrix + // TODO(huangyp): It takes about 3.5 us to compute tranformation_matrix // with one thread. Improve its performance if necessary. internal::compute_tranformation_matrix_cuda<<<1, 1, 0, cu_stream>>>( delta_h, scale_s, scale_v, tranformation_matrix.flat().data(), diff --git a/tensorflow/contrib/image/ops/distort_image_ops.cc b/tensorflow/contrib/image/ops/distort_image_ops.cc index b169b0b2b2..ca49635d5d 100644 --- a/tensorflow/contrib/image/ops/distort_image_ops.cc +++ b/tensorflow/contrib/image/ops/distort_image_ops.cc @@ -36,9 +36,9 @@ REGISTER_OP("AdjustHsvInYiq") Adjust the YIQ hue of one or more images. `images` is a tensor of at least 3 dimensions. The last dimension is -interpretted as channels, and must be three. +interpreted as channels, and must be three. -We used linear transfomation described in: +We used linear transformation described in: beesbuzz.biz/code/hsv_color_transforms.php The input image is considered in the RGB colorspace. Conceptually, the RGB colors are first mapped into YIQ space, rotated around the Y channel by diff --git a/tensorflow/contrib/image/ops/image_ops.cc b/tensorflow/contrib/image/ops/image_ops.cc index e97267fb89..295908d44b 100644 --- a/tensorflow/contrib/image/ops/image_ops.cc +++ b/tensorflow/contrib/image/ops/image_ops.cc @@ -137,7 +137,7 @@ row_to_col_match_indices: A vector of length num_rows, which is the number of If `row_to_col_match_indices[i]` is not -1, row i is matched to column `row_to_col_match_indices[i]`. col_to_row_match_indices: A vector of length num_columns, which is the number - of columns of the input ditance matrix. + of columns of the input distance matrix. If `col_to_row_match_indices[j]` is not -1, column j is matched to row `col_to_row_match_indices[j]`. )doc"); diff --git a/tensorflow/contrib/image/ops/single_image_random_dot_stereograms_ops.cc b/tensorflow/contrib/image/ops/single_image_random_dot_stereograms_ops.cc index 8139d4272d..bd784c6bda 100755 --- a/tensorflow/contrib/image/ops/single_image_random_dot_stereograms_ops.cc +++ b/tensorflow/contrib/image/ops/single_image_random_dot_stereograms_ops.cc @@ -69,7 +69,7 @@ Outputs a single image random dot stereogram for export via encode_PNG/JPG OP. Given the 2-D tensor 'depth_values' with encoded Z values, this operation will encode 3-D data into a 2-D image. The output of this Op is suitable for the encode_PNG/JPG ops. Be careful with image compression as this may corrupt the -encode 3-D data witin the image. +encode 3-D data within the image. This Op is based upon: 'http://www.learningace.com/doc/4331582/b6ab058d1e206d68ab60e4e1ead2fe6e/sirds-paper' @@ -111,7 +111,7 @@ output_image_shape: Output size of returned image in X,Y, Channels 1-grayscale, output_data_window: Size of "DATA" window, must be equal to or smaller than 'output_image_shape', will be centered and use 'convergence_dots_size' for best fit to avoid overlap if possible -image:= A tensor of size 'output_image_shape' with the encloded 'depth_values' +image:= A tensor of size 'output_image_shape' with the encoded 'depth_values' )doc"); } // namespace tensorflow diff --git a/tensorflow/contrib/image/python/ops/image_ops.py b/tensorflow/contrib/image/python/ops/image_ops.py index a8d8cf8c5c..d3c114a88d 100644 --- a/tensorflow/contrib/image/python/ops/image_ops.py +++ b/tensorflow/contrib/image/python/ops/image_ops.py @@ -438,7 +438,7 @@ def bipartite_match(distance_mat, of rows of the input `distance_matrix`. If `row_to_col_match_indices[i]` is not -1, row i is matched to column `row_to_col_match_indices[i]`. col_to_row_match_indices: A vector of length num_columns, which is the - number of columns of the input ditance matrix. + number of columns of the input distance matrix. If `col_to_row_match_indices[j]` is not -1, column j is matched to row `col_to_row_match_indices[j]`. """ diff --git a/tensorflow/contrib/image/python/ops/single_image_random_dot_stereograms.py b/tensorflow/contrib/image/python/ops/single_image_random_dot_stereograms.py index d4a6a5bcbb..0ceb683ff4 100755 --- a/tensorflow/contrib/image/python/ops/single_image_random_dot_stereograms.py +++ b/tensorflow/contrib/image/python/ops/single_image_random_dot_stereograms.py @@ -45,7 +45,7 @@ def single_image_random_dot_stereograms(depth_values, Given the 2-D tensor 'depth_values' with encoded Z values, this operation will encode 3-D data into a 2-D image. The output of this Op is suitable for the encode_PNG/JPG ops. Be careful with image compression as this may - corrupt the encode 3-D data witin the image. + corrupt the encode 3-D data within the image. Based upon [this paper](http://www.learningace.com/doc/4331582/b6ab058d1e206d68ab60e4e1ead2fe6e/sirds-paper). diff --git a/tensorflow/contrib/kfac/python/ops/loss_functions.py b/tensorflow/contrib/kfac/python/ops/loss_functions.py index e7d4243fc3..42d525c2c2 100644 --- a/tensorflow/contrib/kfac/python/ops/loss_functions.py +++ b/tensorflow/contrib/kfac/python/ops/loss_functions.py @@ -613,19 +613,19 @@ class CategoricalLogitsNegativeLogProbLoss(DistributionNegativeLogProbLoss, def multiply_fisher(self, vector): probs = self._probs return vector * probs - probs * math_ops.reduce_sum( - vector * probs, axis=-1, keep_dims=True) + vector * probs, axis=-1, keepdims=True) def multiply_fisher_factor(self, vector): probs = self._probs sqrt_probs = self._sqrt_probs return sqrt_probs * vector - probs * math_ops.reduce_sum( - sqrt_probs * vector, axis=-1, keep_dims=True) + sqrt_probs * vector, axis=-1, keepdims=True) def multiply_fisher_factor_transpose(self, vector): probs = self._probs sqrt_probs = self._sqrt_probs return sqrt_probs * vector - sqrt_probs * math_ops.reduce_sum( - probs * vector, axis=-1, keep_dims=True) + probs * vector, axis=-1, keepdims=True) def multiply_fisher_factor_replicated_one_hot(self, index): assert len(index) == 1, "Length of index was {}".format(len(index)) diff --git a/tensorflow/contrib/kfac/python/ops/loss_functions_lib.py b/tensorflow/contrib/kfac/python/ops/loss_functions_lib.py index 705a871d48..4279cb2792 100644 --- a/tensorflow/contrib/kfac/python/ops/loss_functions_lib.py +++ b/tensorflow/contrib/kfac/python/ops/loss_functions_lib.py @@ -33,7 +33,6 @@ _allowed_symbols = [ "CategoricalLogitsNegativeLogProbLoss", "OnehotCategoricalLogitsNegativeLogProbLoss", "MultiBernoulliNegativeLogProbLoss", - "MultiBernoulliNegativeLogProbLoss", "insert_slice_in_zeros", ] diff --git a/tensorflow/contrib/labeled_tensor/python/ops/ops_test.py b/tensorflow/contrib/labeled_tensor/python/ops/ops_test.py index 0727f4cf88..39e9d65407 100644 --- a/tensorflow/contrib/labeled_tensor/python/ops/ops_test.py +++ b/tensorflow/contrib/labeled_tensor/python/ops/ops_test.py @@ -660,7 +660,7 @@ class ReduceSumTest(Base): sum_lt = ops.reduce_sum(self.original_lt, {('channel', 'hihowareyou')}) golden_lt = core.LabeledTensor( math_ops.reduce_sum( - self.original_lt.tensor, 1, keep_dims=True), + self.original_lt.tensor, 1, keepdims=True), [self.a0, ('channel', ['hihowareyou']), self.a2, self.a3]) self.assertLabeledTensorsEqual(sum_lt, golden_lt) @@ -668,7 +668,7 @@ class ReduceSumTest(Base): sum_lt = ops.reduce_sum(self.original_lt, ('channel', 'hihowareyou')) golden_lt = core.LabeledTensor( math_ops.reduce_sum( - self.original_lt.tensor, 1, keep_dims=True), + self.original_lt.tensor, 1, keepdims=True), [self.a0, ('channel', ['hihowareyou']), self.a2, self.a3]) self.assertLabeledTensorsEqual(sum_lt, golden_lt) diff --git a/tensorflow/contrib/layers/python/kernel_tests/sparse_feature_cross_op_test.py b/tensorflow/contrib/layers/python/kernel_tests/sparse_feature_cross_op_test.py index f701647c2b..28ddaa69a1 100644 --- a/tensorflow/contrib/layers/python/kernel_tests/sparse_feature_cross_op_test.py +++ b/tensorflow/contrib/layers/python/kernel_tests/sparse_feature_cross_op_test.py @@ -200,7 +200,7 @@ class SparseCrossOpTest(test.TestCase): self._assert_sparse_tensor_equals(expected_out, sess.run(op)) def test_large_batch(self): - """Tests with large batch size to force multithreding. + """Tests with large batch size to force multithreading. """ batch_size = 5000 col1 = [] diff --git a/tensorflow/contrib/layers/python/layers/feature_column.py b/tensorflow/contrib/layers/python/layers/feature_column.py index 9ccb589d69..3ae07cedab 100644 --- a/tensorflow/contrib/layers/python/layers/feature_column.py +++ b/tensorflow/contrib/layers/python/layers/feature_column.py @@ -48,7 +48,7 @@ you should choose depends on (1) the feature type and (2) the model type. recommended. embedded_dept_column = embedding_column( - sparse_column_with_keys("department", ["math", "philosphy", ...]), + sparse_column_with_keys("department", ["math", "philosophy", ...]), dimension=10) * Wide (aka linear) models (`LinearClassifier`, `LinearRegressor`). diff --git a/tensorflow/contrib/layers/python/layers/feature_column_ops.py b/tensorflow/contrib/layers/python/layers/feature_column_ops.py index 78affea44c..06060b99e7 100644 --- a/tensorflow/contrib/layers/python/layers/feature_column_ops.py +++ b/tensorflow/contrib/layers/python/layers/feature_column_ops.py @@ -815,7 +815,7 @@ class _Transformer(object): """ def __init__(self, columns_to_tensors): - """Initializes transfomer. + """Initializes transformer. Args: columns_to_tensors: A mapping from feature columns to tensors. 'string' @@ -908,7 +908,7 @@ def _gather_feature_columns(feature_columns): def _check_forbidden_sequence_columns(feature_columns): - """Recursively cecks `feature_columns` for `_FORBIDDEN_SEQUENCE_COLUMNS`.""" + """Recursively checks `feature_columns` for `_FORBIDDEN_SEQUENCE_COLUMNS`.""" all_feature_columns = _gather_feature_columns(feature_columns) for feature_column in all_feature_columns: if isinstance(feature_column, _FORBIDDEN_SEQUENCE_COLUMNS): diff --git a/tensorflow/contrib/layers/python/layers/layers.py b/tensorflow/contrib/layers/python/layers/layers.py index 25c3b1e7ea..2f3e57653c 100644 --- a/tensorflow/contrib/layers/python/layers/layers.py +++ b/tensorflow/contrib/layers/python/layers/layers.py @@ -932,7 +932,8 @@ def convolution(inputs, variables_collections=None, outputs_collections=None, trainable=True, - scope=None): + scope=None, + conv_dims=None): """Adds an N-D convolution followed by an optional batch_norm layer. It is required that 1 <= N <= 3. @@ -993,6 +994,10 @@ def convolution(inputs, trainable: If `True` also add variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). scope: Optional scope for `variable_scope`. + conv_dims: Optional convolution dimensionality, when set it would use the + corresponding convolution (e.g. 2 for Conv 2D, 3 for Conv 3D, ..). When + leaved to None it would select the convolution dimensionality based on + the input rank (i.e. Conv ND, with N = input_rank - 2). Returns: A tensor representing the output of the operation. @@ -1015,6 +1020,9 @@ def convolution(inputs, inputs = ops.convert_to_tensor(inputs) input_rank = inputs.get_shape().ndims + if conv_dims is not None and conv_dims + 2 != input_rank: + raise ValueError('Convolution expects input with rank %d, got %d' % + (conv_dims + 2, input_rank)) if input_rank == 3: layer_class = convolutional_layers.Convolution1D elif input_rank == 4: @@ -1061,10 +1069,134 @@ def convolution(inputs, outputs = activation_fn(outputs) return utils.collect_named_outputs(outputs_collections, sc.name, outputs) +@add_arg_scope +def convolution1d(inputs, + num_outputs, + kernel_size, + stride=1, + padding='SAME', + data_format=None, + rate=1, + activation_fn=nn.relu, + normalizer_fn=None, + normalizer_params=None, + weights_initializer=initializers.xavier_initializer(), + weights_regularizer=None, + biases_initializer=init_ops.zeros_initializer(), + biases_regularizer=None, + reuse=None, + variables_collections=None, + outputs_collections=None, + trainable=True, + scope=None): + return convolution(inputs, + num_outputs, + kernel_size, + stride, + padding, + data_format, + rate, + activation_fn, + normalizer_fn, + normalizer_params, + weights_initializer, + weights_regularizer, + biases_initializer, + biases_regularizer, + reuse, + variables_collections, + outputs_collections, + trainable, + scope, + conv_dims=1) + +convolution1d.__doc__ = convolution.__doc__ -convolution2d = convolution -convolution3d = convolution +@add_arg_scope +def convolution2d(inputs, + num_outputs, + kernel_size, + stride=1, + padding='SAME', + data_format=None, + rate=1, + activation_fn=nn.relu, + normalizer_fn=None, + normalizer_params=None, + weights_initializer=initializers.xavier_initializer(), + weights_regularizer=None, + biases_initializer=init_ops.zeros_initializer(), + biases_regularizer=None, + reuse=None, + variables_collections=None, + outputs_collections=None, + trainable=True, + scope=None): + return convolution(inputs, + num_outputs, + kernel_size, + stride, + padding, + data_format, + rate, + activation_fn, + normalizer_fn, + normalizer_params, + weights_initializer, + weights_regularizer, + biases_initializer, + biases_regularizer, + reuse, + variables_collections, + outputs_collections, + trainable, + scope, + conv_dims=2) + +convolution2d.__doc__ = convolution.__doc__ +@add_arg_scope +def convolution3d(inputs, + num_outputs, + kernel_size, + stride=1, + padding='SAME', + data_format=None, + rate=1, + activation_fn=nn.relu, + normalizer_fn=None, + normalizer_params=None, + weights_initializer=initializers.xavier_initializer(), + weights_regularizer=None, + biases_initializer=init_ops.zeros_initializer(), + biases_regularizer=None, + reuse=None, + variables_collections=None, + outputs_collections=None, + trainable=True, + scope=None): + return convolution(inputs, + num_outputs, + kernel_size, + stride, + padding, + data_format, + rate, + activation_fn, + normalizer_fn, + normalizer_params, + weights_initializer, + weights_regularizer, + biases_initializer, + biases_regularizer, + reuse, + variables_collections, + outputs_collections, + trainable, + scope, + conv_dims=3) + +convolution3d.__doc__ = convolution.__doc__ @add_arg_scope def convolution2d_in_plane( @@ -1411,7 +1543,7 @@ def dense_to_sparse(tensor, eos_token=0, outputs_collections=None, scope=None): Args: tensor: An `int` `Tensor` to be converted to a `Sparse`. eos_token: An integer. - It is part of the target label that signfies the end of a sentence. + It is part of the target label that signifies the end of a sentence. outputs_collections: Collection to add the outputs. scope: Optional scope for name_scope. """ @@ -1555,7 +1687,7 @@ def _inner_flatten(inputs, new_rank, output_collections=None, scope=None): output_collections: Collection to which the outputs will be added. scope: Optional scope for `name_scope`. Returns: - A `Tensor` or `SparseTensor` conataining the same values as `inputs`, but + A `Tensor` or `SparseTensor` containing the same values as `inputs`, but with innermost dimensions flattened to obtain rank `new_rank`. Raises: diff --git a/tensorflow/contrib/layers/python/layers/layers_test.py b/tensorflow/contrib/layers/python/layers/layers_test.py index 997f910a2a..b01fd5d5c9 100644 --- a/tensorflow/contrib/layers/python/layers/layers_test.py +++ b/tensorflow/contrib/layers/python/layers/layers_test.py @@ -310,6 +310,17 @@ class BiasAddTest(test.TestCase): class ConvolutionTest(test.TestCase): + def testInvalidShape(self): + with self.test_session(): + images_2d = random_ops.random_uniform((5, 7, 9, 3), seed=1) + with self.assertRaisesRegexp( + ValueError, 'Convolution expects input with rank 5, got 4'): + layers_lib.convolution3d(images_2d, 32, 3) + images_3d = random_ops.random_uniform((5, 6, 7, 9, 3), seed=1) + with self.assertRaisesRegexp( + ValueError, 'Convolution expects input with rank 4, got 5'): + layers_lib.convolution2d(images_3d, 32, 3) + def testInvalidDataFormat(self): height, width = 7, 9 with self.test_session(): @@ -3155,7 +3166,7 @@ class RepeatTests(test.TestCase): with self.test_session(): images = np.random.uniform(size=(5, height, width, 3)).astype(np.float32) output = _layers.repeat(images, 3, layers_lib.conv2d, 32, [3, 3]) - self.assertEqual(output.op.name, 'Repeat/convolution_3/Relu') + self.assertEqual(output.op.name, 'Repeat/convolution2d_3/Relu') self.assertListEqual(output.get_shape().as_list(), [5, 3, 3, 32]) def testRepeatWithScope(self): @@ -3749,7 +3760,7 @@ class StackTests(test.TestCase): layers_lib.convolution2d, [10, 20, 30], kernel_size=[3, 3], padding='SAME') - self.assertEqual(output.op.name, 'Stack/convolution_3/Relu') + self.assertEqual(output.op.name, 'Stack/convolution2d_3/Relu') self.assertListEqual(output.get_shape().as_list(), [5, 3, 3, 30]) def testStackWithScope(self): diff --git a/tensorflow/contrib/layers/python/layers/rev_block_lib_test.py b/tensorflow/contrib/layers/python/layers/rev_block_lib_test.py index 392a490be1..8c118402a4 100644 --- a/tensorflow/contrib/layers/python/layers/rev_block_lib_test.py +++ b/tensorflow/contrib/layers/python/layers/rev_block_lib_test.py @@ -60,8 +60,8 @@ class RevBlockTest(test.TestCase): sess.run(variables.global_variables_initializer()) x1, x2, x1_inv, x2_inv = sess.run([x1, x2, x1_inv, x2_inv]) - self.assertAllClose(x1, x1_inv) - self.assertAllClose(x2, x2_inv) + self.assertAllClose(x1, x1_inv, atol=1e-5) + self.assertAllClose(x2, x2_inv, atol=1e-5) def testBackwardForward(self): diff --git a/tensorflow/contrib/layers/python/layers/utils_test.py b/tensorflow/contrib/layers/python/layers/utils_test.py index 3409860add..645dc1291e 100644 --- a/tensorflow/contrib/layers/python/layers/utils_test.py +++ b/tensorflow/contrib/layers/python/layers/utils_test.py @@ -294,7 +294,6 @@ class NPositiveIntegersTest(test.TestCase): self.assertEqual(utils.n_positive_integers(2, 2), (2, 2)) self.assertEqual(utils.n_positive_integers(2, (2, 3)), (2, 3)) self.assertEqual(utils.n_positive_integers(3, (2, 3, 1)), (2, 3, 1)) - self.assertEqual(utils.n_positive_integers(3, (2, 3, 1)), (2, 3, 1)) self.assertEqual( utils.n_positive_integers(3, tensor_shape.TensorShape([2, 3, 1])), (2, 3, 1)) diff --git a/tensorflow/contrib/learn/python/learn/estimators/kmeans_test.py b/tensorflow/contrib/learn/python/learn/estimators/kmeans_test.py index b28835a809..584556992a 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/kmeans_test.py +++ b/tensorflow/contrib/learn/python/learn/estimators/kmeans_test.py @@ -36,7 +36,6 @@ from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import data_flow_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import random_ops -from tensorflow.python.ops import random_ops from tensorflow.python.platform import benchmark from tensorflow.python.platform import flags from tensorflow.python.platform import test diff --git a/tensorflow/contrib/learn/python/learn/estimators/run_config.py b/tensorflow/contrib/learn/python/learn/estimators/run_config.py index 8c85c431be..14ee2ba609 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/run_config.py +++ b/tensorflow/contrib/learn/python/learn/estimators/run_config.py @@ -299,6 +299,7 @@ class RunConfig(ClusterConfig, core_run_config.RunConfig): # so instead of breaking compatibility with that assumption, we # just manually initialize this field: self._train_distribute = None + self._device_fn = None gpu_options = config_pb2.GPUOptions( per_process_gpu_memory_fraction=gpu_memory_fraction) diff --git a/tensorflow/contrib/lite/Makefile b/tensorflow/contrib/lite/Makefile index b4504f246a..65fba52d46 100644 --- a/tensorflow/contrib/lite/Makefile +++ b/tensorflow/contrib/lite/Makefile @@ -90,7 +90,8 @@ $(wildcard tensorflow/contrib/lite/kernels/*.c) \ $(wildcard tensorflow/contrib/lite/kernels/internal/*.c) \ $(wildcard tensorflow/contrib/lite/kernels/internal/optimized/*.c) \ $(wildcard tensorflow/contrib/lite/kernels/internal/reference/*.c) \ -$(wildcard tensorflow/contrib/lite/downloads/farmhash/src/farmhash.cc) +$(wildcard tensorflow/contrib/lite/downloads/farmhash/src/farmhash.cc) \ +$(wildcard tensorflow/contrib/lite/downloads/fft2d/fftsg.c) # Remove any duplicates. CORE_CC_ALL_SRCS := $(sort $(CORE_CC_ALL_SRCS)) CORE_CC_EXCLUDE_SRCS := \ diff --git a/tensorflow/contrib/lite/download_dependencies.sh b/tensorflow/contrib/lite/download_dependencies.sh index a93ed201d6..436c3e1d4c 100755 --- a/tensorflow/contrib/lite/download_dependencies.sh +++ b/tensorflow/contrib/lite/download_dependencies.sh @@ -30,12 +30,15 @@ if [ ! -f $BZL_FILE_PATH ]; then fi EIGEN_URL="$(grep -o 'http.*bitbucket.org/eigen/eigen/get/.*tar\.gz' "${BZL_FILE_PATH}" | grep -v mirror.bazel | head -n1)" -GEMMLOWP_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/gemmlowp/.*zip' "${BZL_FILE_PATH}" | head -n1)" +# TODO (yongtang): Replace the following with 'https://mirror.bazel.build/github.com/google/gemmlowp/.*zip' once +# the archive has been propagated in mirror.bazel.build. +GEMMLOWP_URL="$(grep -o 'https://github.com/google/gemmlowp/.*zip' "${BZL_FILE_PATH}" | head -n1)" GOOGLETEST_URL="https://github.com/google/googletest/archive/release-1.8.0.tar.gz" ABSL_URL="$(grep -o 'https://github.com/abseil/abseil-cpp/.*tar.gz' "${BZL_FILE_PATH}" | head -n1)" NEON_2_SSE_URL="https://github.com/intel/ARM_NEON_2_x86_SSE/archive/master.zip" FARMHASH_URL="https://mirror.bazel.build/github.com/google/farmhash/archive/816a4ae622e964763ca0862d9dbd19324a1eaf45.tar.gz" FLATBUFFERS_URL="https://github.com/google/flatbuffers/archive/master.zip" +FFT2D_URL="https://mirror.bazel.build/www.kurims.kyoto-u.ac.jp/~ooura/fft.tgz" # TODO(petewarden): Some new code in Eigen triggers a clang bug with iOS arm64, # so work around it by patching the source. @@ -91,6 +94,7 @@ download_and_extract "${ABSL_URL}" "${DOWNLOADS_DIR}/absl" download_and_extract "${NEON_2_SSE_URL}" "${DOWNLOADS_DIR}/neon_2_sse" download_and_extract "${FARMHASH_URL}" "${DOWNLOADS_DIR}/farmhash" download_and_extract "${FLATBUFFERS_URL}" "${DOWNLOADS_DIR}/flatbuffers" +download_and_extract "${FFT2D_URL}" "${DOWNLOADS_DIR}/fft2d" replace_by_sed 's#static uint32x4_t p4ui_CONJ_XOR = vld1q_u32( conj_XOR_DATA );#static uint32x4_t p4ui_CONJ_XOR; // = vld1q_u32( conj_XOR_DATA ); - Removed by script#' \ "${DOWNLOADS_DIR}/eigen/Eigen/src/Core/arch/NEON/Complex.h" diff --git a/tensorflow/contrib/lite/examples/ios/camera/tflite_camera_example.xcodeproj/project.pbxproj b/tensorflow/contrib/lite/examples/ios/camera/tflite_camera_example.xcodeproj/project.pbxproj index b0236e9c60..98d3b5bb8a 100644 --- a/tensorflow/contrib/lite/examples/ios/camera/tflite_camera_example.xcodeproj/project.pbxproj +++ b/tensorflow/contrib/lite/examples/ios/camera/tflite_camera_example.xcodeproj/project.pbxproj @@ -326,10 +326,6 @@ GCC_WARN_UNUSED_VARIABLE = YES; HEADER_SEARCH_PATHS = ( "$(inherited)", - ../../../../../../, - ../../../downloads/flatbuffers/include/, - ../../../downloads/eigen/, - ../../../downloads/, ); IPHONEOS_DEPLOYMENT_TARGET = 8.0; MTL_ENABLE_DEBUG_INFO = YES; @@ -373,10 +369,6 @@ GCC_WARN_UNUSED_VARIABLE = YES; HEADER_SEARCH_PATHS = ( "$(inherited)", - ../../../../../../, - ../../../downloads/flatbuffers/include/, - ../../../downloads/eigen/, - ../../../downloads/, ); IPHONEOS_DEPLOYMENT_TARGET = 8.0; MTL_ENABLE_DEBUG_INFO = NO; diff --git a/tensorflow/contrib/lite/g3doc/apis.md b/tensorflow/contrib/lite/g3doc/apis.md index fe208e47d1..50cc146a87 100644 --- a/tensorflow/contrib/lite/g3doc/apis.md +++ b/tensorflow/contrib/lite/g3doc/apis.md @@ -29,7 +29,7 @@ interpreter->AllocateTensors(); float* input = interpreter->typed_input_tensor(0); // Fill `input`. interpreter->Invoke(); -float* output = interpreter->type_output_tensor(0); +float* output = interpreter->typed_output_tensor(0); ``` ### Data Alignment diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/Camera2BasicFragment.java b/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/Camera2BasicFragment.java index 300786c3ca..18f6465188 100644 --- a/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/Camera2BasicFragment.java +++ b/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/Camera2BasicFragment.java @@ -54,6 +54,9 @@ import android.view.Surface; import android.view.TextureView; import android.view.View; import android.view.ViewGroup; +import android.widget.CompoundButton; +import android.widget.NumberPicker; +import android.widget.ToggleButton; import android.widget.TextView; import android.widget.Toast; import java.io.IOException; @@ -82,6 +85,8 @@ public class Camera2BasicFragment extends Fragment private boolean runClassifier = false; private boolean checkedPermissions = false; private TextView textView; + private ToggleButton toggle; + private NumberPicker np; private ImageClassifier classifier; /** Max preview width that is guaranteed by Camera2 API */ @@ -289,6 +294,24 @@ public class Camera2BasicFragment extends Fragment public void onViewCreated(final View view, Bundle savedInstanceState) { textureView = (AutoFitTextureView) view.findViewById(R.id.texture); textView = (TextView) view.findViewById(R.id.text); + toggle = (ToggleButton) view.findViewById(R.id.button); + + toggle.setOnCheckedChangeListener(new CompoundButton.OnCheckedChangeListener() { + public void onCheckedChanged(CompoundButton buttonView, boolean isChecked) { + classifier.setUseNNAPI(isChecked); + } + }); + + np = (NumberPicker) view.findViewById(R.id.np); + np.setMinValue(1); + np.setMaxValue(10); + np.setWrapSelectorWheel(true); + np.setOnValueChangedListener(new NumberPicker.OnValueChangeListener() { + @Override + public void onValueChange(NumberPicker picker, int oldVal, int newVal){ + classifier.setNumThreads(newVal); + } + }); } /** Load the model and labels. */ diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifier.java b/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifier.java index c57bb348c5..d32c077910 100644 --- a/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifier.java +++ b/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifier.java @@ -142,6 +142,16 @@ public abstract class ImageClassifier { } } + public void setUseNNAPI(Boolean nnapi) { + if (tflite != null) + tflite.setUseNNAPI(nnapi); + } + + public void setNumThreads(int num_threads) { + if (tflite != null) + tflite.setNumThreads(num_threads); + } + /** Closes tflite to release resources. */ public void close() { tflite.close(); diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/res/layout/fragment_camera2_basic.xml b/tensorflow/contrib/lite/java/demo/app/src/main/res/layout/fragment_camera2_basic.xml index 15305c436e..db557ad62f 100644 --- a/tensorflow/contrib/lite/java/demo/app/src/main/res/layout/fragment_camera2_basic.xml +++ b/tensorflow/contrib/lite/java/demo/app/src/main/res/layout/fragment_camera2_basic.xml @@ -22,24 +22,59 @@ android:layout_width="wrap_content" android:layout_height="wrap_content" android:layout_alignParentStart="true" + android:layout_alignParentLeft="true" android:layout_alignParentTop="true" /> - + + + + + + + diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/res/values/strings.xml b/tensorflow/contrib/lite/java/demo/app/src/main/res/values/strings.xml index a08ec3eb62..29a033bcd4 100644 --- a/tensorflow/contrib/lite/java/demo/app/src/main/res/values/strings.xml +++ b/tensorflow/contrib/lite/java/demo/app/src/main/res/values/strings.xml @@ -21,4 +21,6 @@ NN:On NN:Off Use NNAPI + tflite + NNAPI diff --git a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java index e915e65aa1..e84ee71129 100644 --- a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java +++ b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java @@ -215,6 +215,13 @@ public final class Interpreter implements AutoCloseable { } } + public void setNumThreads(int num_threads) { + if (wrapper == null) { + throw new IllegalStateException("The interpreter has already been closed."); + } + wrapper.setNumThreads(num_threads); + } + /** Release resources associated with the {@code Interpreter}. */ @Override public void close() { diff --git a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java index dfc8ac111a..2fc803715b 100644 --- a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java +++ b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java @@ -153,6 +153,10 @@ final class NativeInterpreterWrapper implements AutoCloseable { useNNAPI(interpreterHandle, useNNAPI); } + void setNumThreads(int num_threads) { + numThreads(interpreterHandle, num_threads); + } + /** Gets index of an input given its name. */ int getInputIndex(String name) { if (inputsIndexes == null) { @@ -324,6 +328,8 @@ final class NativeInterpreterWrapper implements AutoCloseable { private static native void useNNAPI(long interpreterHandle, boolean state); + private static native void numThreads(long interpreterHandle, int num_threads); + private static native long createErrorReporter(int size); private static native long createModel(String modelPathOrBuffer, long errorHandle); diff --git a/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc b/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc index ccfdfd829b..45f510da1d 100644 --- a/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc +++ b/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc @@ -320,6 +320,16 @@ Java_org_tensorflow_lite_NativeInterpreterWrapper_useNNAPI(JNIEnv* env, interpreter->UseNNAPI(static_cast(state)); } +JNIEXPORT void JNICALL +Java_org_tensorflow_lite_NativeInterpreterWrapper_numThreads(JNIEnv* env, + jclass clazz, + jlong handle, + jint num_threads) { + tflite::Interpreter* interpreter = convertLongToInterpreter(env, handle); + if (interpreter == nullptr) return; + interpreter->SetNumThreads(static_cast(num_threads)); +} + JNIEXPORT jlong JNICALL Java_org_tensorflow_lite_NativeInterpreterWrapper_createErrorReporter( JNIEnv* env, jclass clazz, jint size) { diff --git a/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.h b/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.h index 0e28a77fee..eaa765cb34 100644 --- a/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.h +++ b/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.h @@ -61,7 +61,7 @@ Java_org_tensorflow_lite_NativeInterpreterWrapper_getOutputNames(JNIEnv* env, /* * Class: org_tensorflow_lite_NativeInterpreterWrapper * Method: - * Signature: (JZ) + * Signature: (JZ)V */ JNIEXPORT void JNICALL Java_org_tensorflow_lite_NativeInterpreterWrapper_useNNAPI(JNIEnv* env, @@ -69,6 +69,16 @@ Java_org_tensorflow_lite_NativeInterpreterWrapper_useNNAPI(JNIEnv* env, jlong handle, jboolean state); +/* + * Class: org_tensorflow_lite_NativeInterpreterWrapper + * Method: + * Signature: (JI)V + */ +JNIEXPORT void JNICALL +Java_org_tensorflow_lite_NativeInterpreterWrapper_numThreads(JNIEnv* env, + jclass clazz, + jlong handle, + jint num_threads); /* * Class: org_tensorflow_lite_NativeInterpreterWrapper * Method: diff --git a/tensorflow/contrib/lite/kernels/add.cc b/tensorflow/contrib/lite/kernels/add.cc index 63ea89df56..e0aa070e2d 100644 --- a/tensorflow/contrib/lite/kernels/add.cc +++ b/tensorflow/contrib/lite/kernels/add.cc @@ -176,7 +176,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { output); } else { context->ReportError(context, - "Inputs and outputs not all float|unit8 types."); + "Inputs and outputs not all float|uint8 types."); return kTfLiteError; } diff --git a/tensorflow/contrib/lite/kernels/div.cc b/tensorflow/contrib/lite/kernels/div.cc index 6dd243ad62..ec380c8e49 100644 --- a/tensorflow/contrib/lite/kernels/div.cc +++ b/tensorflow/contrib/lite/kernels/div.cc @@ -106,6 +106,8 @@ void EvalFloat(TfLiteContext* context, TfLiteNode* node, #undef TF_LITE_DIV } + + template TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { auto* params = reinterpret_cast(node->builtin_data); @@ -118,7 +120,8 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { if (output->type == kTfLiteFloat32) { EvalFloat(context, node, params, data, input1, input2, output); } else { - context->ReportError(context, "Inputs and outputs not all float types."); + context->ReportError(context, + "Div only supports FLOAT32 and quantized UINT8 now."); return kTfLiteError; } diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h index d585bcca0e..9e9aba0169 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h @@ -4374,7 +4374,7 @@ inline void Softmax(const uint8* input_data, const Dims<4>& input_dims, using FixedPointAccum = gemmlowp::FixedPoint; using FixedPoint0 = gemmlowp::FixedPoint; - gemmlowp::ScopedProfilingLabel label("Softmax/8bit"); +gemmlowp::ScopedProfilingLabel label("Softmax/8bit"); const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); const int height = MatchingArraySize(input_dims, 2, output_dims, 2); const int width = MatchingArraySize(input_dims, 1, output_dims, 1); diff --git a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h index ae295cc8b5..4c8cbe4275 100644 --- a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h @@ -1403,6 +1403,33 @@ inline void BroadcastMul(const uint8* input1_data, const Dims<4>& input1_dims, output_data, output_dims); } +inline void Div(const float* input1_data, const Dims<4>& input1_dims, + const float* input2_data, const Dims<4>& input2_dims, + float output_activation_min, float output_activation_max, + float* output_data, const Dims<4>& output_dims) { + const int batches = + MatchingArraySize(input1_dims, 3, input2_dims, 3, output_dims, 3); + const int height = + MatchingArraySize(input1_dims, 2, input2_dims, 2, output_dims, 2); + const int width = + MatchingArraySize(input1_dims, 1, input2_dims, 1, output_dims, 1); + const int depth = + MatchingArraySize(input1_dims, 0, input2_dims, 0, output_dims, 0); + for (int b = 0; b < batches; ++b) { + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + for (int c = 0; c < depth; ++c) { + output_data[Offset(output_dims, c, x, y, b)] = + ActivationFunctionWithMinMax( + input1_data[Offset(input1_dims, c, x, y, b)] / + input2_data[Offset(input2_dims, c, x, y, b)], + output_activation_min, output_activation_max); + } + } + } + } +} + // TODO(jiawen): We can implement BroadcastDiv on buffers of arbitrary // dimensionality if the runtime code does a single loop over one dimension // that handles broadcasting as the base case. The code generator would then @@ -1444,18 +1471,6 @@ void BroadcastDiv(const T* input1_data, const Dims<4>& input1_dims, } } -inline void Div(const float* input1_data, const Dims<4>& input1_dims, - const float* input2_data, const Dims<4>& input2_dims, - float output_activation_min, float output_activation_max, - float* output_data, const Dims<4>& output_dims) { - const int flat_size = MatchingFlatSize(input1_dims, input2_dims, output_dims); - for (int i = 0; i < flat_size; ++i) { - output_data[i] = ActivationFunctionWithMinMax( - input1_data[i] / input2_data[i], output_activation_min, - output_activation_max); - } -} - inline void Sub(const float* input1_data, const Dims<4>& input1_dims, const float* input2_data, const Dims<4>& input2_dims, float output_activation_min, float output_activation_max, diff --git a/tensorflow/contrib/lite/kernels/sub.cc b/tensorflow/contrib/lite/kernels/sub.cc index 66b06aeaec..7c60a4fdbf 100644 --- a/tensorflow/contrib/lite/kernels/sub.cc +++ b/tensorflow/contrib/lite/kernels/sub.cc @@ -174,7 +174,8 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { EvalQuantized(context, node, params, data, input1, input2, output); } else { - context->ReportError(context, "Inputs and outputs not all float types."); + context->ReportError(context, + "Inputs and outputs not all float|uint8 types."); return kTfLiteError; } diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_merge.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_merge.cc index 477e7f13da..38e0005890 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_merge.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_merge.cc @@ -32,7 +32,7 @@ bool ResolveTensorFlowMerge::Run(Model* model, std::size_t op_index) { } // We need to yield until this Merge node has only 1 input, which will mean - // that that is the selected input. Other graph transformations on other nodes + // that is the selected input. Other graph transformations on other nodes // such as ResolveTensorFlowSwitch, will take care of trimming the // non-selected inputs, so that at some point there will be only 1 input left. if (merge_op->inputs.size() > 1) { diff --git a/tensorflow/contrib/lite/toco/model.h b/tensorflow/contrib/lite/toco/model.h index 705a9d69a6..482cc71d8b 100644 --- a/tensorflow/contrib/lite/toco/model.h +++ b/tensorflow/contrib/lite/toco/model.h @@ -152,9 +152,9 @@ enum class AxesOrder { }; // The type of the scalars in an array. -// Note that that does not by itself tell whether the values in the array are -// real (are literally interpreted as real numbers) or quantized (only acquire -// a meaning as real numbers in conjunction with QuantizationParams). +// Note that the type does not by itself tell whether the values in the array +// are real (are literally interpreted as real numbers) or quantized (only +// acquire a meaning as real numbers in conjunction with QuantizationParams). // // In practice though: // float values are always real diff --git a/tensorflow/contrib/losses/python/losses/loss_ops.py b/tensorflow/contrib/losses/python/losses/loss_ops.py index 8c3a8afe7a..bdad34a665 100644 --- a/tensorflow/contrib/losses/python/losses/loss_ops.py +++ b/tensorflow/contrib/losses/python/losses/loss_ops.py @@ -29,6 +29,7 @@ from tensorflow.python.ops import nn from tensorflow.python.ops import nn_ops from tensorflow.python.util.deprecation import deprecated from tensorflow.python.util.deprecation import deprecated_args +from tensorflow.python.util.deprecation import deprecated_argument_lookup __all__ = [ "absolute_difference", "add_loss", "cosine_distance", @@ -651,11 +652,9 @@ def cosine_distance(predictions, ValueError: If `predictions` shape doesn't match `labels` shape, or `weights` is `None`. """ - if dim is not None: - if axis is not None: - raise ValueError("Cannot specify both 'axis' and 'dim'") - axis = dim - if axis is None and dim is None: + axis = deprecated_argument_lookup( + "axis", axis, "dim", dim) + if axis is None: raise ValueError("You must specify 'axis'.") with ops.name_scope(scope, "cosine_distance_loss", [predictions, labels, weights]) as scope: diff --git a/tensorflow/contrib/losses/python/metric_learning/metric_loss_ops.py b/tensorflow/contrib/losses/python/metric_learning/metric_loss_ops.py index 2b9eee4ef7..de76acb51f 100644 --- a/tensorflow/contrib/losses/python/metric_learning/metric_loss_ops.py +++ b/tensorflow/contrib/losses/python/metric_learning/metric_loss_ops.py @@ -711,7 +711,7 @@ def _find_loss_augmented_facility_idx(pairwise_distances, labels, chosen_ids, candidate_scores, margin_multiplier * nmi_scores) argmax_index = math_ops.to_int32( - math_ops.argmax(candidate_scores, dimension=0)) + math_ops.argmax(candidate_scores, axis=0)) return candidate_ids[argmax_index] @@ -811,7 +811,7 @@ def update_medoid_per_cluster(pairwise_distances, pairwise_distances_subset, candidate_scores = math_ops.add(scores_fac, margin_multiplier * scores_margin) argmax_index = math_ops.to_int32( - math_ops.argmax(candidate_scores, dimension=0)) + math_ops.argmax(candidate_scores, axis=0)) best_medoid = math_ops.to_int32(cluster_member_ids[argmax_index]) chosen_ids = update_1d_tensor(chosen_ids, cluster_idx, best_medoid) diff --git a/tensorflow/contrib/makefile/download_dependencies.sh b/tensorflow/contrib/makefile/download_dependencies.sh index 48953e2e38..eff9081e35 100755 --- a/tensorflow/contrib/makefile/download_dependencies.sh +++ b/tensorflow/contrib/makefile/download_dependencies.sh @@ -27,7 +27,9 @@ if [ ! -f $BZL_FILE_PATH ]; then fi EIGEN_URL="$(grep -o 'http.*bitbucket.org/eigen/eigen/get/.*tar\.gz' "${BZL_FILE_PATH}" | grep -v mirror.bazel | head -n1)" -GEMMLOWP_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/gemmlowp/.*zip' "${BZL_FILE_PATH}" | head -n1)" +# TODO (yongtang): Replace the following with 'https://mirror.bazel.build/github.com/google/gemmlowp/.*zip' once +# the archive has been propagated in mirror.bazel.build. +GEMMLOWP_URL="$(grep -o 'https://github.com/google/gemmlowp/.*zip' "${BZL_FILE_PATH}" | head -n1)" GOOGLETEST_URL="https://github.com/google/googletest/archive/release-1.8.0.tar.gz" NSYNC_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/nsync/.*tar\.gz' "${BZL_FILE_PATH}" | head -n1)" PROTOBUF_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/protobuf/.*tar\.gz' "${BZL_FILE_PATH}" | head -n1)" diff --git a/tensorflow/contrib/meta_graph_transform/meta_graph_transform.py b/tensorflow/contrib/meta_graph_transform/meta_graph_transform.py index 4090c1ff3e..f37a2593e2 100644 --- a/tensorflow/contrib/meta_graph_transform/meta_graph_transform.py +++ b/tensorflow/contrib/meta_graph_transform/meta_graph_transform.py @@ -348,7 +348,7 @@ def _freeze_graph_with_def_protos(input_graph_def, output_node_names, input_saver_def, input_checkpoint): """Converts all variables in a graph and checkpoint into constants. - During this process, we need to retain certain initialzer nodes (e.g. table + During this process, we need to retain certain initializer nodes (e.g. table initializer nodes). Instead of determining which dependencies of the shared initializer node (e.g. group_deps) to keep, we reconstruct the connections between the individual initializer nodes and diff --git a/tensorflow/contrib/metrics/python/ops/metric_ops.py b/tensorflow/contrib/metrics/python/ops/metric_ops.py index 5364e3075d..00a933e5e0 100644 --- a/tensorflow/contrib/metrics/python/ops/metric_ops.py +++ b/tensorflow/contrib/metrics/python/ops/metric_ops.py @@ -2834,7 +2834,9 @@ def streaming_sparse_average_precision_at_top_k(top_k_predictions, name=name) -@deprecated(None, 'Please switch to tf.metrics.mean.') +@deprecated(None, + 'Please switch to tf.metrics.mean_absolute_error. Note that the ' + 'order of the labels and predictions arguments has been switched.') def streaming_mean_absolute_error(predictions, labels, weights=None, @@ -2953,7 +2955,9 @@ def streaming_mean_relative_error(predictions, updates_collections=updates_collections, name=name) - +@deprecated(None, + 'Please switch to tf.metrics.mean_squared_error. Note that the ' + 'order of the labels and predictions arguments has been switched.') def streaming_mean_squared_error(predictions, labels, weights=None, @@ -3011,7 +3015,10 @@ def streaming_mean_squared_error(predictions, updates_collections=updates_collections, name=name) - +@deprecated( + None, + 'Please switch to tf.metrics.root_mean_squared_error. Note that the ' + 'order of the labels and predictions arguments has been switched.') def streaming_root_mean_squared_error(predictions, labels, weights=None, @@ -3351,7 +3358,7 @@ def streaming_mean_cosine_distance(predictions, radial_diffs = math_ops.reduce_sum( radial_diffs, reduction_indices=[ dim, - ], keep_dims=True) + ], keepdims=True) mean_distance, update_op = streaming_mean(radial_diffs, weights, None, None, name or 'mean_cosine_distance') mean_distance = math_ops.subtract(1.0, mean_distance) diff --git a/tensorflow/contrib/nn/python/ops/sampling_ops.py b/tensorflow/contrib/nn/python/ops/sampling_ops.py index 63fc487dca..e65925610c 100644 --- a/tensorflow/contrib/nn/python/ops/sampling_ops.py +++ b/tensorflow/contrib/nn/python/ops/sampling_ops.py @@ -88,7 +88,7 @@ def _rank_resample(weights, biases, inputs, sampled_values, num_resampled, return math_ops.reduce_logsumexp( math_ops.matmul(embeddings, reweighted_inputs, transpose_b=True), axis=1, - keep_dims=False) + keepdims=False) # Calling this protected form of embedding_lookup allows co-locating # the logsumexp computation with the partitioned weights, which yields diff --git a/tensorflow/contrib/opt/BUILD b/tensorflow/contrib/opt/BUILD index c57c5e3f29..612ecc3e63 100644 --- a/tensorflow/contrib/opt/BUILD +++ b/tensorflow/contrib/opt/BUILD @@ -14,6 +14,7 @@ py_library( name = "opt_py", srcs = [ "__init__.py", + "python/training/adamax.py", "python/training/addsign.py", "python/training/drop_stale_gradient_optimizer.py", "python/training/elastic_average_optimizer.py", @@ -43,11 +44,27 @@ py_library( "//tensorflow/python:util", "//tensorflow/python:variable_scope", "//tensorflow/python:variables", + "//tensorflow/python/eager:context", "//third_party/py/numpy", "@six_archive//:six", ], ) +py_test( + name = "adamax_test", + srcs = ["python/training/adamax_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":opt_py", + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:math_ops", + "//tensorflow/python:training", + "//third_party/py/numpy", + ], +) + py_test( name = "external_optimizer_test", srcs = ["python/training/external_optimizer_test.py"], diff --git a/tensorflow/contrib/opt/__init__.py b/tensorflow/contrib/opt/__init__.py index 6c1bb1adc0..4c13c8e247 100644 --- a/tensorflow/contrib/opt/__init__.py +++ b/tensorflow/contrib/opt/__init__.py @@ -19,6 +19,7 @@ from __future__ import division from __future__ import print_function # pylint: disable=wildcard-import +from tensorflow.contrib.opt.python.training.adamax import * from tensorflow.contrib.opt.python.training.addsign import * from tensorflow.contrib.opt.python.training.drop_stale_gradient_optimizer import * from tensorflow.contrib.opt.python.training.external_optimizer import * @@ -36,6 +37,7 @@ from tensorflow.python.util.all_util import remove_undocumented _allowed_symbols = [ + 'AdaMaxOptimizer', 'PowerSignOptimizer', 'AddSignOptimizer', 'DelayCompensatedGradientDescentOptimizer', diff --git a/tensorflow/contrib/opt/python/training/adamax.py b/tensorflow/contrib/opt/python/training/adamax.py new file mode 100644 index 0000000000..686bac0d84 --- /dev/null +++ b/tensorflow/contrib/opt/python/training/adamax.py @@ -0,0 +1,191 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""AdaMax for TensorFlow.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.eager import context +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import resource_variable_ops +from tensorflow.python.ops import state_ops +from tensorflow.python.training import adam +from tensorflow.python.training import training_ops + + +class AdaMaxOptimizer(adam.AdamOptimizer): + """Optimizer that implements the AdaMax algorithm. + + Adamax is sometimes superior to adam, specially in models with embeddings, + see [Kingma et al., 2014](http://arxiv.org/abs/1412.6980) + ([pdf](http://arxiv.org/pdf/1412.6980.pdf)). + """ + + def __init__(self, learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-8, + use_locking=False, name="AdaMax"): + """Construct a new AdaMax optimizer. + + Initialization: + + ``` + m_0 <- 0 (Initialize initial 1st moment vector) + v_0 <- 0 (Initialize the exponentially weighted infinity norm) + t <- 0 (Initialize timestep) + ``` + + The update rule for `variable` with gradient `g` uses an optimization + described at the end of section 7.1 of the paper: + + ``` + t <- t + 1 + + m_t <- beta1 * m_{t-1} + (1 - beta1) * g + v_t <- max(beta2 * v_{t-1}, abs(g)) + variable <- variable - learning_rate / (1 - beta1^t) * m_t / (v_t + epsilon) + ``` + + Similar to AdamOptimizer, the epsilon is added for numerical stability + (especially to get rid of division by zero when v_t = 0). + + Contrast to AdamOptimizer, the sparse implementation of this algorithm + (used when the gradient is an IndexedSlices object, typically because of + `tf.gather` or an embedding lookup in the forward pass) only updates + variable slices and corresponding `m_t`, `v_t` terms when that part of + the variable was used in the forward pass. This means that the sparse + behavior is contrast to the dense behavior (similar to some momentum + implementations which ignore momentum unless a variable slice was actually + used). + + Args: + learning_rate: A Tensor or a floating point value. The learning rate. + beta1: A float value or a constant float tensor. + The exponential decay rate for the 1st moment estimates. + beta2: A float value or a constant float tensor. + The exponential decay rate for the exponentially weighted infinity norm. + epsilon: A small constant for numerical stability. + use_locking: If True use locks for update operations. + name: Optional name for the operations created when applying gradients. + Defaults to "AdaMax". + """ + super(AdaMaxOptimizer, self).__init__(learning_rate, beta1, beta2, + epsilon, use_locking, name) + + def _get_beta_accumulators(self): + if context.executing_eagerly(): + graph = None + else: + graph = ops.get_default_graph() + return self._get_non_slot_variable("beta1_power", graph=graph) + + def _create_slots(self, var_list): + # Create the beta1 accumulators on the same device as the first + # variable. Sort the var_list to make sure this device is consistent across + # workers (these need to go on the same PS, otherwise some updates are + # silently ignored). + first_var = min(var_list, key=lambda x: x.name) + self._create_non_slot_variable(initial_value=self._beta1, + name="beta1_power", + colocate_with=first_var) + + # Create slots for the first and second moments. + for v in var_list: + self._zeros_slot(v, "m", self._name) + self._zeros_slot(v, "v", self._name) + + def _apply_dense(self, grad, var): + m = self.get_slot(var, "m") + v = self.get_slot(var, "v") + beta1_power = self._get_beta_accumulators() + return training_ops.apply_ada_max( + var, m, v, + math_ops.cast(beta1_power, var.dtype.base_dtype), + math_ops.cast(self._lr_t, var.dtype.base_dtype), + math_ops.cast(self._beta1_t, var.dtype.base_dtype), + math_ops.cast(self._beta2_t, var.dtype.base_dtype), + math_ops.cast(self._epsilon_t, var.dtype.base_dtype), + grad, use_locking=self._use_locking).op + + def _resource_apply_dense(self, grad, var): + m = self.get_slot(var, "m") + v = self.get_slot(var, "v") + beta1_power = self._get_beta_accumulators() + return training_ops.resource_apply_ada_max( + var.handle, m.handle, v.handle, + math_ops.cast(beta1_power, grad.dtype.base_dtype), + math_ops.cast(self._lr_t, grad.dtype.base_dtype), + math_ops.cast(self._beta1_t, grad.dtype.base_dtype), + math_ops.cast(self._beta2_t, grad.dtype.base_dtype), + math_ops.cast(self._epsilon_t, grad.dtype.base_dtype), + grad, use_locking=self._use_locking) + + def _apply_sparse_shared(self, grad, var, indices, + scatter_add, scatter_update): + beta1_power = self._get_beta_accumulators() + beta1_power = math_ops.cast(beta1_power, var.dtype.base_dtype) + lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype) + beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype) + beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype) + epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype) + # m_t = beta1 * m + (1 - beta1) * g_t + m = self.get_slot(var, "m") + m_slice = array_ops.gather(m, indices) + m_t_slice = m_slice * beta1_t + grad * (1 - beta1_t) + with ops.control_dependencies([m_t_slice]): + m_t = scatter_update(m, indices, m_t_slice) + # u_t = max(beta2 * u, abs(g_t)) + v = self.get_slot(var, "v") + v_slice = array_ops.gather(v, indices) + v_t_slice = math_ops.maximum(v_slice * beta2_t, math_ops.abs(grad)) + with ops.control_dependencies([v_t_slice]): + v_t = scatter_update(v, indices, v_t_slice) + # theta_t = theta - lr / (1 - beta1^t) * m_t / u_t + var_slice = -lr_t / (1 - beta1_power) * (m_t_slice / + (v_t_slice + epsilon_t)) + with ops.control_dependencies([var_slice]): + var_update = scatter_add(var, indices, var_slice) + return control_flow_ops.group(*[var_update, m_t, v_t]) + + def _apply_sparse(self, grad, var): + return self._apply_sparse_shared( + grad.values, var, grad.indices, + lambda x, i, v: state_ops.scatter_add( # pylint: disable=g-long-lambda + x, i, v, use_locking=self._use_locking), + lambda x, i, v: state_ops.scatter_update( # pylint: disable=g-long-lambda + x, i, v, use_locking=self._use_locking)) + + def _resource_scatter_update(self, x, i, v): + with ops.control_dependencies( + [resource_variable_ops.resource_scatter_update( + x.handle, i, v)]): + return x.value() + + def _resource_apply_sparse(self, grad, var, indices): + return self._apply_sparse_shared( + grad, var, indices, + self._resource_scatter_add, self._resource_scatter_update) + + def _finish(self, update_ops, name_scope): + # Update the power accumulators. + with ops.control_dependencies(update_ops): + beta1_power = self._get_beta_accumulators() + with ops.colocate_with(beta1_power): + update_beta1 = beta1_power.assign( + beta1_power * self._beta1_t, use_locking=self._use_locking) + return control_flow_ops.group(*update_ops + [update_beta1], + name=name_scope) diff --git a/tensorflow/contrib/opt/python/training/adamax_test.py b/tensorflow/contrib/opt/python/training/adamax_test.py new file mode 100644 index 0000000000..bc92a7006f --- /dev/null +++ b/tensorflow/contrib/opt/python/training/adamax_test.py @@ -0,0 +1,348 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for AdaMax.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.contrib.opt.python.training import adamax +from tensorflow.python.client import session +from tensorflow.python.eager import context +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.framework import test_util +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import resource_variable_ops +from tensorflow.python.ops import variables +from tensorflow.python.platform import test + + +def adamax_update_numpy(param, + g_t, + t, + m, + v, + alpha=0.001, + beta1=0.9, + beta2=0.999, + epsilon=1e-8): + m_t = beta1 * m + (1 - beta1) * g_t + v_t = np.maximum(beta2 * v, np.abs(g_t)) + param_t = param - (alpha / (1 - beta1**t)) * (m_t / (v_t + epsilon)) + return param_t, m_t, v_t + + +def adamax_sparse_update_numpy(param, + indices, + g_t, + t, + m, + v, + alpha=0.001, + beta1=0.9, + beta2=0.999, + epsilon=1e-8): + m_t, v_t, param_t = np.copy(m), np.copy(v), np.copy(param) + m_t_slice = beta1 * m[indices] + (1 - beta1) * g_t + v_t_slice = np.maximum(beta2 * v[indices], np.abs(g_t)) + param_t_slice = param[indices] - ((alpha / (1 - beta1**t)) * + (m_t_slice / (v_t_slice + epsilon))) + m_t[indices] = m_t_slice + v_t[indices] = v_t_slice + param_t[indices] = param_t_slice + return param_t, m_t, v_t + + +class AdaMaxOptimizerTest(test.TestCase): + + def doTestSparse(self, use_resource=False): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + with self.test_session(): + # Initialize variables for numpy implementation. + zero_slots = lambda: np.zeros((3), dtype=dtype.as_numpy_dtype) + m0, v0, m1, v1 = zero_slots(), zero_slots(), zero_slots(), zero_slots() + var0_np = np.array([1.0, 2.0, 3.0], dtype=dtype.as_numpy_dtype) + grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) + var1_np = np.array([4.0, 5.0, 6.0], dtype=dtype.as_numpy_dtype) + grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) + + if use_resource: + var0 = resource_variable_ops.ResourceVariable(var0_np) + var1 = resource_variable_ops.ResourceVariable(var1_np) + else: + var0 = variables.Variable(var0_np) + var1 = variables.Variable(var1_np) + grads0_np_indices = np.array([0, 1], dtype=np.int32) + grads0 = ops.IndexedSlices( + constant_op.constant(grads0_np), + constant_op.constant(grads0_np_indices), constant_op.constant([2])) + grads1_np_indices = np.array([2, 1], dtype=np.int32) + grads1 = ops.IndexedSlices( + constant_op.constant(grads1_np), + constant_op.constant(grads1_np_indices), constant_op.constant([2])) + opt = adamax.AdaMaxOptimizer() + update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) + variables.global_variables_initializer().run() + + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0, 3.0], var0.eval()) + self.assertAllClose([4.0, 5.0, 6.0], var1.eval()) + + beta1_power = opt._get_beta_accumulators() + + # Run 3 steps of AdaMax + for t in range(1, 4): + self.assertAllCloseAccordingToType(0.9**t, beta1_power.eval()) + update.run() + + var0_np, m0, v0 = adamax_sparse_update_numpy( + var0_np, grads0_np_indices, grads0_np, t, m0, v0) + var1_np, m1, v1 = adamax_sparse_update_numpy( + var1_np, grads1_np_indices, grads1_np, t, m1, v1) + + # Validate updated params + self.assertAllCloseAccordingToType(var0_np, var0.eval()) + self.assertAllCloseAccordingToType(var1_np, var1.eval()) + + def testSparse(self): + self.doTestSparse(use_resource=False) + + def testResourceSparse(self): + self.doTestSparse(use_resource=True) + + def testSparseDevicePlacement(self): + for index_dtype in [dtypes.int32, dtypes.int64]: + with self.test_session(force_gpu=test.is_gpu_available()): + # If a GPU is available, tests that all optimizer ops can be placed on + # it (i.e. they have GPU kernels). + var = variables.Variable([[1.0], [2.0]]) + indices = constant_op.constant([0, 1], dtype=index_dtype) + gathered_sum = math_ops.reduce_sum(array_ops.gather(var, indices)) + optimizer = adamax.AdaMaxOptimizer(3.0) + minimize_op = optimizer.minimize(gathered_sum) + variables.global_variables_initializer().run() + minimize_op.run() + + def testSparseRepeatedIndices(self): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + with self.test_session(): + repeated_index_update_var = variables.Variable( + [[1.0], [2.0]], dtype=dtype) + aggregated_update_var = variables.Variable( + [[1.0], [2.0]], dtype=dtype) + grad_repeated_index = ops.IndexedSlices( + constant_op.constant( + [0.1, 0.1], shape=[2, 1], dtype=dtype), + constant_op.constant([1, 1]), + constant_op.constant([2, 1])) + grad_aggregated = ops.IndexedSlices( + constant_op.constant( + [0.2], shape=[1, 1], dtype=dtype), + constant_op.constant([1]), + constant_op.constant([2, 1])) + repeated_update = adamax.AdaMaxOptimizer().apply_gradients( + [(grad_repeated_index, repeated_index_update_var)]) + aggregated_update = adamax.AdaMaxOptimizer().apply_gradients( + [(grad_aggregated, aggregated_update_var)]) + variables.global_variables_initializer().run() + self.assertAllClose(aggregated_update_var.eval(), + repeated_index_update_var.eval()) + for _ in range(3): + repeated_update.run() + aggregated_update.run() + self.assertAllClose(aggregated_update_var.eval(), + repeated_index_update_var.eval()) + + def doTestBasic(self, use_resource=False): + for i, dtype in enumerate([dtypes.half, dtypes.float32, dtypes.float64]): + with self.test_session(graph=ops.Graph()): + # Initialize variables for numpy implementation. + m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 + var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) + grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) + var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) + grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) + + if use_resource: + var0 = resource_variable_ops.ResourceVariable( + var0_np, name="var0_%d" % i) + var1 = resource_variable_ops.ResourceVariable( + var1_np, name="var1_%d" % i) + else: + var0 = variables.Variable(var0_np) + var1 = variables.Variable(var1_np) + grads0 = constant_op.constant(grads0_np) + grads1 = constant_op.constant(grads1_np) + + opt = adamax.AdaMaxOptimizer() + update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) + opt_variables = opt.variables() + beta1_power = opt._get_beta_accumulators() + self.assertTrue(beta1_power is not None) + self.assertIn(beta1_power, opt_variables) + + with ops.Graph().as_default(): + # Shouldn't return non-slot variables from other graphs. + self.assertEqual(0, len(opt.variables())) + + if not context.executing_eagerly(): + self.evaluate(variables.global_variables_initializer()) + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], self.evaluate(var0)) + self.assertAllClose([3.0, 4.0], self.evaluate(var1)) + + beta1_power = opt._get_beta_accumulators() + + # Run 3 steps of AdaMax + for t in range(1, 4): + if not context.executing_eagerly(): + self.evaluate(update) + elif t > 1: + opt.apply_gradients(zip([grads0, grads1], [var0, var1])) + + self.assertAllCloseAccordingToType(0.9**(t + 1), + self.evaluate(beta1_power)) + + var0_np, m0, v0 = adamax_update_numpy(var0_np, grads0_np, t, m0, v0) + var1_np, m1, v1 = adamax_update_numpy(var1_np, grads1_np, t, m1, v1) + + # Validate updated params + self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) + self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1)) + if use_resource: + self.assertEqual("var0_%d/AdaMax:0" % (i,), + opt.get_slot(var=var0, name="m").name) + + def testBasic(self): + with self.test_session(): + self.doTestBasic(use_resource=False) + + @test_util.run_in_graph_and_eager_modes(reset_test=True) + def testResourceBasic(self): + self.doTestBasic(use_resource=True) + + def testTensorLearningRate(self): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + with self.test_session(): + # Initialize variables for numpy implementation. + m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 + var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) + grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) + var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) + grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) + + var0 = variables.Variable(var0_np) + var1 = variables.Variable(var1_np) + grads0 = constant_op.constant(grads0_np) + grads1 = constant_op.constant(grads1_np) + opt = adamax.AdaMaxOptimizer(constant_op.constant(0.001)) + update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) + variables.global_variables_initializer().run() + + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], var0.eval()) + self.assertAllClose([3.0, 4.0], var1.eval()) + + beta1_power = opt._get_beta_accumulators() + + # Run 3 steps of AdaMax + for t in range(1, 4): + self.assertAllCloseAccordingToType(0.9**t, beta1_power.eval()) + update.run() + + var0_np, m0, v0 = adamax_update_numpy(var0_np, grads0_np, t, m0, v0) + var1_np, m1, v1 = adamax_update_numpy(var1_np, grads1_np, t, m1, v1) + + # Validate updated params + self.assertAllCloseAccordingToType(var0_np, var0.eval()) + self.assertAllCloseAccordingToType(var1_np, var1.eval()) + + def testSharing(self): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + with self.test_session(): + # Initialize variables for numpy implementation. + m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 + var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) + grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) + var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) + grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) + + var0 = variables.Variable(var0_np) + var1 = variables.Variable(var1_np) + grads0 = constant_op.constant(grads0_np) + grads1 = constant_op.constant(grads1_np) + opt = adamax.AdaMaxOptimizer() + update1 = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) + update2 = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) + variables.global_variables_initializer().run() + + beta1_power = opt._get_beta_accumulators() + + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], var0.eval()) + self.assertAllClose([3.0, 4.0], var1.eval()) + + # Run 3 steps of intertwined AdaMax1 and AdaMax2. + for t in range(1, 4): + self.assertAllCloseAccordingToType(0.9**t, beta1_power.eval()) + if t % 2 == 0: + update1.run() + else: + update2.run() + + var0_np, m0, v0 = adamax_update_numpy(var0_np, grads0_np, t, m0, v0) + var1_np, m1, v1 = adamax_update_numpy(var1_np, grads1_np, t, m1, v1) + + # Validate updated params + self.assertAllCloseAccordingToType(var0_np, var0.eval()) + self.assertAllCloseAccordingToType(var1_np, var1.eval()) + + def testTwoSessions(self): + optimizer = adamax.AdaMaxOptimizer() + g = ops.Graph() + with g.as_default(): + with session.Session(): + var0 = variables.Variable(np.array([1.0, 2.0]), name="v0") + grads0 = constant_op.constant(np.array([0.1, 0.1])) + optimizer.apply_gradients([(grads0, var0)]) + + gg = ops.Graph() + with gg.as_default(): + with session.Session(): + var0 = variables.Variable(np.array([1.0, 2.0]), name="v0") + grads0 = constant_op.constant(np.array([0.1, 0.1])) + + # If the optimizer saves any state not keyed by graph the following line + # fails. + optimizer.apply_gradients([(grads0, var0)]) + + def testSlotsUniqueEager(self): + with context.eager_mode(): + v1 = resource_variable_ops.ResourceVariable(1.) + v2 = resource_variable_ops.ResourceVariable(1.) + opt = adamax.AdaMaxOptimizer(1.) + opt.minimize(lambda: v1 + v2) + # There should be two non-slot variables, and two unique slot variables + # for v1 and v2 respectively. + self.assertEqual(5, len(set(opt.variables()))) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/opt/python/training/moving_average_optimizer_test.py b/tensorflow/contrib/opt/python/training/moving_average_optimizer_test.py index 85e3e8d379..ac04ad9911 100644 --- a/tensorflow/contrib/opt/python/training/moving_average_optimizer_test.py +++ b/tensorflow/contrib/opt/python/training/moving_average_optimizer_test.py @@ -85,7 +85,7 @@ class MovingAverageOptimizerTest(test.TestCase): state_ops.assign_add(ema_var1, [4.0, 4.0]) ]) - # Test taht saver with missing ema variables will fail. + # Test that saver with missing ema variables will fail. with self.assertRaisesRegexp(ValueError, r'Variable to swap'): opt.swapping_saver(var_list=[var0]) @@ -123,7 +123,7 @@ class MovingAverageOptimizerTest(test.TestCase): self.assertAllCloseAccordingToType([0.9, 1.9], ema_var0.eval()) self.assertAllCloseAccordingToType([4.98, 5.98], var1.eval()) self.assertAllCloseAccordingToType([6.99, 7.99], ema_var1.eval()) - # Restore back to previou state. + # Restore back to previous state. train_saver.restore(sess, save_path) # If updates are parallel, this is not always true after the 1st step. diff --git a/tensorflow/contrib/optimizer_v2/checkpointable_utils_test.py b/tensorflow/contrib/optimizer_v2/checkpointable_utils_test.py index 6ade4ccd52..8ac9b58145 100644 --- a/tensorflow/contrib/optimizer_v2/checkpointable_utils_test.py +++ b/tensorflow/contrib/optimizer_v2/checkpointable_utils_test.py @@ -456,7 +456,7 @@ class CheckpointingTests(test.TestCase): optimizer.apply_gradients( [(g, v) for g, v in zip(grad, model.vars)]) - @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) + @test_util.run_in_graph_and_eager_modes() def testDeferredSlotRestoration(self): checkpoint_directory = self.get_temp_dir() diff --git a/tensorflow/contrib/optimizer_v2/optimizer_v2.py b/tensorflow/contrib/optimizer_v2/optimizer_v2.py index dcb5bb6416..46bfbb729f 100644 --- a/tensorflow/contrib/optimizer_v2/optimizer_v2.py +++ b/tensorflow/contrib/optimizer_v2/optimizer_v2.py @@ -564,7 +564,7 @@ class OptimizerV2(optimizer_v1.Optimizer): ### State - Internal methods apre passed a `state` argument with the correct + Internal methods are passed a `state` argument with the correct values to use for the slot and non-slot variables, and the hyper parameters. """ diff --git a/tensorflow/contrib/quantize/python/fold_batch_norms.py b/tensorflow/contrib/quantize/python/fold_batch_norms.py index 4a8f8a04cc..aa0ef64308 100644 --- a/tensorflow/contrib/quantize/python/fold_batch_norms.py +++ b/tensorflow/contrib/quantize/python/fold_batch_norms.py @@ -545,7 +545,7 @@ def _GetBatchNormParams(graph, context, has_scaling): gamma_tensor = graph.get_tensor_by_name(op.name + ':0') if not has_scaling: - gamma_tensor = array_ops.ones(batch_mean_tensor.shape) + gamma_tensor = array_ops.ones(moving_mean_tensor.shape) return _BatchNormMatch( layer_op=None, diff --git a/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_test.py b/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_test.py index 0232103c41..cd162bae25 100644 --- a/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_test.py +++ b/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_test.py @@ -30,6 +30,7 @@ from tensorflow.contrib.seq2seq.python.ops import helper as helper_py from tensorflow.contrib.seq2seq.python.ops import basic_decoder from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes +from tensorflow.python.layers import core as layers_core from tensorflow.python.ops import array_ops from tensorflow.python.ops import init_ops from tensorflow.python.ops import math_ops @@ -110,7 +111,12 @@ class AttentionWrapperTest(test.TestCase): alignment_history=False, expected_final_alignment_history=None, attention_layer_size=6, + attention_layer=None, name=''): + attention_layer_sizes = ( + [attention_layer_size] if attention_layer_size is not None else None) + attention_layers = ( + [attention_layer] if attention_layer is not None else None) self._testWithMaybeMultiAttention( is_multi=False, create_attention_mechanisms=[create_attention_mechanism], @@ -119,7 +125,8 @@ class AttentionWrapperTest(test.TestCase): attention_mechanism_depths=[attention_mechanism_depth], alignment_history=alignment_history, expected_final_alignment_history=expected_final_alignment_history, - attention_layer_sizes=[attention_layer_size], + attention_layer_sizes=attention_layer_sizes, + attention_layers=attention_layers, name=name) def _testWithMaybeMultiAttention(self, @@ -131,6 +138,7 @@ class AttentionWrapperTest(test.TestCase): alignment_history=False, expected_final_alignment_history=None, attention_layer_sizes=None, + attention_layers=None, name=''): # Allow is_multi to be True with a single mechanism to enable test for # passing in a single mechanism in a list. @@ -144,12 +152,18 @@ class AttentionWrapperTest(test.TestCase): encoder_output_depth = 10 cell_depth = 9 - if attention_layer_sizes is None: - attention_depth = encoder_output_depth * len(create_attention_mechanisms) - else: + if attention_layer_sizes is not None: # Compute sum of attention_layer_sizes. Use encoder_output_depth if None. attention_depth = sum([attention_layer_size or encoder_output_depth for attention_layer_size in attention_layer_sizes]) + elif attention_layers is not None: + # Compute sum of attention_layers output depth. + attention_depth = sum( + attention_layer.compute_output_shape( + [batch_size, cell_depth + encoder_output_depth])[-1].value + for attention_layer in attention_layers) + else: + attention_depth = encoder_output_depth * len(create_attention_mechanisms) decoder_inputs = array_ops.placeholder_with_default( np.random.randn(batch_size, decoder_max_time, @@ -171,13 +185,20 @@ class AttentionWrapperTest(test.TestCase): with vs.variable_scope( 'root', initializer=init_ops.random_normal_initializer(stddev=0.01, seed=3)): + attention_layer_size = attention_layer_sizes + attention_layer = attention_layers + if not is_multi: + if attention_layer_size is not None: + attention_layer_size = attention_layer_size[0] + if attention_layer is not None: + attention_layer = attention_layer[0] cell = rnn_cell.LSTMCell(cell_depth) cell = wrapper.AttentionWrapper( cell, attention_mechanisms if is_multi else attention_mechanisms[0], - attention_layer_size=(attention_layer_sizes if is_multi - else attention_layer_sizes[0]), - alignment_history=alignment_history) + attention_layer_size=attention_layer_size, + alignment_history=alignment_history, + attention_layer=attention_layer) helper = helper_py.TrainingHelper(decoder_inputs, decoder_sequence_length) my_decoder = basic_decoder.BasicDecoder( @@ -260,6 +281,41 @@ class AttentionWrapperTest(test.TestCase): expected_final_alignment_history, final_alignment_history_info) + def testBahdanauNormalizedDType(self): + for dtype in [np.float16, np.float32, np.float64]: + num_units = 128 + encoder_outputs = array_ops.placeholder(dtype, shape=[64, None, 256]) + encoder_sequence_length = array_ops.placeholder(dtypes.int32, shape=[64]) + decoder_inputs = array_ops.placeholder(dtype, shape=[64, None, 128]) + decoder_sequence_length = array_ops.placeholder(dtypes.int32, shape=[64]) + batch_size = 64 + attention_mechanism = wrapper.BahdanauAttention( + num_units=num_units, + memory=encoder_outputs, + memory_sequence_length=encoder_sequence_length, + normalize=True, + dtype=dtype, + ) + cell = rnn_cell.LSTMCell(num_units) + cell = wrapper.AttentionWrapper(cell, attention_mechanism) + + helper = helper_py.TrainingHelper(decoder_inputs, + decoder_sequence_length) + my_decoder = basic_decoder.BasicDecoder( + cell=cell, + helper=helper, + initial_state=cell.zero_state( + dtype=dtype, batch_size=batch_size)) + + final_outputs, final_state, _ = decoder.dynamic_decode(my_decoder) + self.assertTrue( + isinstance(final_outputs, basic_decoder.BasicDecoderOutput)) + self.assertEqual(final_outputs.rnn_output.dtype, dtype) + self.assertTrue( + isinstance(final_state, wrapper.AttentionWrapperState)) + self.assertTrue( + isinstance(final_state.cell_state, rnn_cell.LSTMStateTuple)) + def testBahdanauNotNormalized(self): create_attention_mechanism = wrapper.BahdanauAttention @@ -797,6 +853,48 @@ class AttentionWrapperTest(test.TestCase): expected_final_alignment_history=expected_final_alignment_history, name='testMultiAttention') + def testMultiAttentionWithLayerInstances(self): + create_attention_mechanisms = ( + wrapper.BahdanauAttention, wrapper.LuongAttention) + + expected_final_output = BasicDecoderOutput( + rnn_output=ResultSummary( + shape=(5, 3, 7), dtype=dtype('float32'), mean=0.0011709079), + sample_id=ResultSummary( + shape=(5, 3), dtype=dtype('int32'), mean=3.2000000000000002)) + expected_final_state = AttentionWrapperState( + cell_state=LSTMStateTuple( + c=ResultSummary( + shape=(5, 9), dtype=dtype('float32'), mean=-0.0038725811), + h=ResultSummary( + shape=(5, 9), dtype=dtype('float32'), mean=-0.0019329828)), + attention=ResultSummary( + shape=(5, 7), dtype=dtype('float32'), mean=0.001174294), + time=3, + alignments=( + ResultSummary(shape=(5, 8), dtype=dtype('float32'), mean=0.125), + ResultSummary(shape=(5, 8), dtype=dtype('float32'), mean=0.125)), + attention_state=( + ResultSummary(shape=(5, 8), dtype=dtype('float32'), mean=0.125), + ResultSummary(shape=(5, 8), dtype=dtype('float32'), mean=0.125)), + alignment_history=()) + + expected_final_alignment_history = ( + ResultSummary(shape=(3, 5, 8), dtype=dtype('float32'), mean=0.125), + ResultSummary(shape=(3, 5, 8), dtype=dtype('float32'), mean=0.125)) + + self._testWithMaybeMultiAttention( + True, + create_attention_mechanisms, + expected_final_output, + expected_final_state, + attention_mechanism_depths=[9, 9], + attention_layers=[layers_core.Dense(3, use_bias=False), + layers_core.Dense(4, use_bias=False)], + alignment_history=True, + expected_final_alignment_history=expected_final_alignment_history, + name='testMultiAttention') + def testLuongMonotonicHard(self): # Run attention mechanism with mode='hard', make sure probabilities are hard b, t, u, d = 10, 20, 30, 40 diff --git a/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py b/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py index 8a40a7ab53..1c9d179e3c 100644 --- a/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py +++ b/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py @@ -472,7 +472,8 @@ def _bahdanau_score(processed_query, keys, normalize): # Scalar used in weight normalization g = variable_scope.get_variable( "attention_g", dtype=dtype, - initializer=math.sqrt((1. / num_units))) + initializer=init_ops.constant_initializer(math.sqrt((1. / num_units))), + shape=()) # Bias added prior to the nonlinearity b = variable_scope.get_variable( "attention_b", [num_units], dtype=dtype, @@ -1082,7 +1083,8 @@ class AttentionWrapper(rnn_cell_impl.RNNCell): cell_input_fn=None, output_attention=True, initial_cell_state=None, - name=None): + name=None, + attention_layer=None): """Construct the `AttentionWrapper`. **NOTE** If you are using the `BeamSearchDecoder` with a cell wrapped in @@ -1125,7 +1127,8 @@ class AttentionWrapper(rnn_cell_impl.RNNCell): (default), use the context as attention at each time step. Otherwise, feed the context and cell output into the attention layer to generate attention at each time step. If attention_mechanism is a list, - attention_layer_size must be a list of the same length. + attention_layer_size must be a list of the same length. If + attention_layer is set, this must be None. alignment_history: Python boolean, whether to store alignment history from all time steps in the final output state (currently stored as a time major `TensorArray` on which you must call `stack()`). @@ -1145,12 +1148,19 @@ class AttentionWrapper(rnn_cell_impl.RNNCell): does not match the batch size of `initial_cell_state`, proper behavior is not guaranteed. name: Name to use when creating ops. + attention_layer: A list of `tf.layers.Layer` instances or a + single `tf.layers.Layer` instance taking the context and cell output as + inputs to generate attention at each time step. If None (default), use + the context as attention at each time step. If attention_mechanism is a + list, attention_layer must be a list of the same length. If + attention_layers_size is set, this must be None. Raises: TypeError: `attention_layer_size` is not None and (`attention_mechanism` is a list but `attention_layer_size` is not; or vice versa). ValueError: if `attention_layer_size` is not None, `attention_mechanism` - is a list, and its length does not match that of `attention_layer_size`. + is a list, and its length does not match that of `attention_layer_size`; + if `attention_layer_size` and `attention_layer` are set simultaneously. """ super(AttentionWrapper, self).__init__(name=name) rnn_cell_impl.assert_like_rnncell("cell", cell) @@ -1181,6 +1191,10 @@ class AttentionWrapper(rnn_cell_impl.RNNCell): "cell_input_fn must be callable, saw type: %s" % type(cell_input_fn).__name__) + if attention_layer_size is not None and attention_layer is not None: + raise ValueError("Only one of attention_layer_size and attention_layer " + "should be set") + if attention_layer_size is not None: attention_layer_sizes = tuple( attention_layer_size @@ -1199,6 +1213,22 @@ class AttentionWrapper(rnn_cell_impl.RNNCell): dtype=attention_mechanisms[i].dtype) for i, attention_layer_size in enumerate(attention_layer_sizes)) self._attention_layer_size = sum(attention_layer_sizes) + elif attention_layer is not None: + self._attention_layers = tuple( + attention_layer + if isinstance(attention_layer, (list, tuple)) + else (attention_layer,)) + if len(self._attention_layers) != len(attention_mechanisms): + raise ValueError( + "If provided, attention_layer must contain exactly one " + "layer per attention_mechanism, saw: %d vs %d" + % (len(self._attention_layers), len(attention_mechanisms))) + self._attention_layer_size = sum( + layer.compute_output_shape( + [None, + cell.output_size + mechanism.values.shape[-1].value])[-1].value + for layer, mechanism in zip( + self._attention_layers, attention_mechanisms)) else: self._attention_layers = None self._attention_layer_size = sum( diff --git a/tensorflow/contrib/signal/python/kernel_tests/mel_ops_test.py b/tensorflow/contrib/signal/python/kernel_tests/mel_ops_test.py index 35c4b5bec1..345eb6cfaa 100644 --- a/tensorflow/contrib/signal/python/kernel_tests/mel_ops_test.py +++ b/tensorflow/contrib/signal/python/kernel_tests/mel_ops_test.py @@ -24,6 +24,7 @@ from tensorflow.contrib.signal.python.kernel_tests import test_util from tensorflow.contrib.signal.python.ops import mel_ops from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops from tensorflow.python.platform import test # mel spectrum constants and functions. @@ -173,6 +174,18 @@ class LinearToMelTest(test.TestCase): rewritten_graph = test_util.grappler_optimize(g, [mel_matrix]) self.assertEqual(1, len(rewritten_graph.node)) + def test_num_spectrogram_bins_dynamic(self): + with self.test_session(use_gpu=True): + num_spectrogram_bins = array_ops.placeholder(shape=(), + dtype=dtypes.int32) + mel_matrix_np = spectrogram_to_mel_matrix( + 20, 129, 8000.0, 125.0, 3800.0) + mel_matrix = mel_ops.linear_to_mel_weight_matrix( + 20, num_spectrogram_bins, 8000.0, 125.0, 3800.0) + self.assertAllClose( + mel_matrix_np, + mel_matrix.eval(feed_dict={num_spectrogram_bins: 129}), atol=3e-6) + if __name__ == "__main__": test.main() diff --git a/tensorflow/contrib/signal/python/ops/mel_ops.py b/tensorflow/contrib/signal/python/ops/mel_ops.py index d1a36548d9..1e84006116 100644 --- a/tensorflow/contrib/signal/python/ops/mel_ops.py +++ b/tensorflow/contrib/signal/python/ops/mel_ops.py @@ -64,14 +64,11 @@ def _hertz_to_mel(frequencies_hertz, name=None): 1.0 + (frequencies_hertz / _MEL_BREAK_FREQUENCY_HERTZ)) -def _validate_arguments(num_mel_bins, num_spectrogram_bins, sample_rate, +def _validate_arguments(num_mel_bins, sample_rate, lower_edge_hertz, upper_edge_hertz, dtype): """Checks the inputs to linear_to_mel_weight_matrix.""" if num_mel_bins <= 0: raise ValueError('num_mel_bins must be positive. Got: %s' % num_mel_bins) - if num_spectrogram_bins <= 0: - raise ValueError('num_spectrogram_bins must be positive. Got: %s' % - num_spectrogram_bins) if sample_rate <= 0.0: raise ValueError('sample_rate must be positive. Got: %s' % sample_rate) if lower_edge_hertz < 0.0: @@ -122,9 +119,9 @@ def linear_to_mel_weight_matrix(num_mel_bins=20, Args: num_mel_bins: Python int. How many bands in the resulting mel spectrum. - num_spectrogram_bins: Python int. How many bins there are in the source - spectrogram data, which is understood to be `fft_size // 2 + 1`, i.e. the - spectrogram only contains the nonredundant FFT bins. + num_spectrogram_bins: An integer `Tensor`. How many bins there are in the + source spectrogram data, which is understood to be `fft_size // 2 + 1`, + i.e. the spectrogram only contains the nonredundant FFT bins. sample_rate: Python float. Samples per second of the input signal used to create the spectrogram. We need this to figure out the actual frequencies for each spectrogram bin, which dictates how they are mapped into the mel @@ -148,7 +145,10 @@ def linear_to_mel_weight_matrix(num_mel_bins=20, [mel]: https://en.wikipedia.org/wiki/Mel_scale """ with ops.name_scope(name, 'linear_to_mel_weight_matrix') as name: - _validate_arguments(num_mel_bins, num_spectrogram_bins, sample_rate, + # Note: As num_spectrogram_bins is passed to `math_ops.linspace` + # and the validation is already done in linspace (both in shape function + # and in kernel), there is no need to validate num_spectrogram_bins here. + _validate_arguments(num_mel_bins, sample_rate, lower_edge_hertz, upper_edge_hertz, dtype) # To preserve accuracy, we compute the matrix at float64 precision and then diff --git a/tensorflow/contrib/slim/README.md b/tensorflow/contrib/slim/README.md index 40f484fd78..746b955642 100644 --- a/tensorflow/contrib/slim/README.md +++ b/tensorflow/contrib/slim/README.md @@ -290,9 +290,9 @@ slim.stack(x, slim.conv2d, [(32, [3, 3]), (32, [1, 1]), (64, [3, 3]), (64, [1, 1 In addition to the types of scope mechanisms in TensorFlow ([name_scope](https://www.tensorflow.org/api_docs/python/tf/name_scope), -[variable_scope](https://www.tensorflow.org/api_docs/python/tf/variable_scope), +[variable_scope](https://www.tensorflow.org/api_docs/python/tf/variable_scope)), TF-Slim adds a new scoping mechanism called -[arg_scope](https://www.tensorflow.org/api_docs/python/tf/contrib/framework/arg_scope), +[arg_scope](https://www.tensorflow.org/api_docs/python/tf/contrib/framework/arg_scope). This new scope allows a user to specify one or more operations and a set of arguments which will be passed to each of the operations defined in the `arg_scope`. This functionality is best illustrated by example. Consider the @@ -761,8 +761,8 @@ parts: 3. Finalization: (optionally) perform any final operation to compute metric values. For example, computing means, mins, maxes, etc. -For example, to compute `mean_absolute_error`, two variables, a `count` and -`total` variable are *initialized* to zero. During *aggregation*, we observed +For example, to compute `mean_absolute_error`, two variables (`count` and +`total`) are *initialized* to zero. During *aggregation*, we observed some set of predictions and labels, compute their absolute differences and add the total to `total`. Each time we observe another value, `count` is incremented. Finally, during *finalization*, `total` is divided diff --git a/tensorflow/contrib/slim/python/slim/learning.py b/tensorflow/contrib/slim/python/slim/learning.py index 6a200de1ea..8a2c74742a 100644 --- a/tensorflow/contrib/slim/python/slim/learning.py +++ b/tensorflow/contrib/slim/python/slim/learning.py @@ -389,7 +389,7 @@ def create_train_op(total_loss, total_loss: A `Tensor` representing the total loss. optimizer: A tf.Optimizer to use for computing the gradients. global_step: A `Tensor` representing the global step variable. If left as - `_USE_GLOBAL_STEP`, then slim.variables.global_step() is used. + `_USE_GLOBAL_STEP`, then tf.contrib.framework.global_step() is used. update_ops: An optional list of updates to execute. If `update_ops` is `None`, then the update ops are set to the contents of the `tf.GraphKeys.UPDATE_OPS` collection. If `update_ops` is not `None`, but @@ -578,7 +578,8 @@ def train(train_op, is_chief: Specifies whether or not the training is being run by the primary replica during replica training. global_step: The `Tensor` representing the global step. If left as `None`, - then slim.variables.get_or_create_global_step() is used. + then training_util.get_or_create_global_step(), that is, + tf.contrib.framework.global_step() is used. number_of_steps: The max number of gradient steps to take during training, as measured by 'global_step': training will stop if global_step is greater than 'number_of_steps'. If the value is left as None, training diff --git a/tensorflow/contrib/slim/python/slim/nets/resnet_v1.py b/tensorflow/contrib/slim/python/slim/nets/resnet_v1.py index 235a595de4..11c4214176 100644 --- a/tensorflow/contrib/slim/python/slim/nets/resnet_v1.py +++ b/tensorflow/contrib/slim/python/slim/nets/resnet_v1.py @@ -207,7 +207,7 @@ def resnet_v1(inputs, net = resnet_utils.stack_blocks_dense(net, blocks, output_stride) if global_pool: # Global average pooling. - net = math_ops.reduce_mean(net, [1, 2], name='pool5', keep_dims=True) + net = math_ops.reduce_mean(net, [1, 2], name='pool5', keepdims=True) if num_classes is not None: net = layers.conv2d( net, diff --git a/tensorflow/contrib/slim/python/slim/nets/resnet_v2.py b/tensorflow/contrib/slim/python/slim/nets/resnet_v2.py index 61665c9c8b..19e0538dd1 100644 --- a/tensorflow/contrib/slim/python/slim/nets/resnet_v2.py +++ b/tensorflow/contrib/slim/python/slim/nets/resnet_v2.py @@ -221,7 +221,7 @@ def resnet_v2(inputs, net, activation_fn=nn_ops.relu, scope='postnorm') if global_pool: # Global average pooling. - net = math_ops.reduce_mean(net, [1, 2], name='pool5', keep_dims=True) + net = math_ops.reduce_mean(net, [1, 2], name='pool5', keepdims=True) if num_classes is not None: net = layers_lib.conv2d( net, diff --git a/tensorflow/contrib/tensor_forest/client/random_forest.py b/tensorflow/contrib/tensor_forest/client/random_forest.py index 4abcc20ed3..35e8c92aba 100644 --- a/tensorflow/contrib/tensor_forest/client/random_forest.py +++ b/tensorflow/contrib/tensor_forest/client/random_forest.py @@ -399,7 +399,7 @@ def get_combined_model_fn(model_fns): training ops: tf.group them. loss: average them. predictions: concat probabilities such that predictions[*][0-C1] are the - probablities for output 1 (where C1 is the number of classes in output 1), + probabilities for output 1 (where C1 is the number of classes in output 1), predictions[*][C1-(C1+C2)] are the probabilities for output 2 (where C2 is the number of classes in output 2), etc. Also stack predictions such that predictions[i][j] is the class prediction for example i and output j. diff --git a/tensorflow/contrib/tensor_forest/hybrid/core/ops/hard_routing_function_op.cc b/tensorflow/contrib/tensor_forest/hybrid/core/ops/hard_routing_function_op.cc index cf0db788a4..06bfe871fd 100644 --- a/tensorflow/contrib/tensor_forest/hybrid/core/ops/hard_routing_function_op.cc +++ b/tensorflow/contrib/tensor_forest/hybrid/core/ops/hard_routing_function_op.cc @@ -80,7 +80,7 @@ REGISTER_OP("HardRoutingFunction") regression model that translates from node features to probabilities. - path_probility: `path_probability[i]` gives the probability of reaching each + path_probability: `path_probability[i]` gives the probability of reaching each node in `path[i]`. path: `path[i][j]` gives the jth node in the path taken by the ith data instance. diff --git a/tensorflow/contrib/tensor_forest/hybrid/core/ops/stochastic_hard_routing_function_op.cc b/tensorflow/contrib/tensor_forest/hybrid/core/ops/stochastic_hard_routing_function_op.cc index c9df09bfda..1a055756c0 100644 --- a/tensorflow/contrib/tensor_forest/hybrid/core/ops/stochastic_hard_routing_function_op.cc +++ b/tensorflow/contrib/tensor_forest/hybrid/core/ops/stochastic_hard_routing_function_op.cc @@ -85,7 +85,7 @@ REGISTER_OP("StochasticHardRoutingFunction") regression model that translates from node features to probabilities. - path_probility: `path_probability[i]` gives the probability of reaching each + path_probability: `path_probability[i]` gives the probability of reaching each node in `path[i]`. path: `path[i][j]` gives the jth node in the path taken by the ith data instance. diff --git a/tensorflow/contrib/tensor_forest/hybrid/core/ops/stochastic_hard_routing_gradient_op.cc b/tensorflow/contrib/tensor_forest/hybrid/core/ops/stochastic_hard_routing_gradient_op.cc index b0d8b832b5..7d092bbc24 100644 --- a/tensorflow/contrib/tensor_forest/hybrid/core/ops/stochastic_hard_routing_gradient_op.cc +++ b/tensorflow/contrib/tensor_forest/hybrid/core/ops/stochastic_hard_routing_gradient_op.cc @@ -81,7 +81,7 @@ REGISTER_OP("StochasticHardRoutingGradient") tree_biases: `tree_biases[i]` gives the bias of the logistic regression model that translates from node features to probabilities. - path_probility: `path_probability[i]` gives the probability of reaching each + path_probability: `path_probability[i]` gives the probability of reaching each node in `path[i]`. path: `path[i][j]` gives the jth node in the path taken by the ith data instance. diff --git a/tensorflow/contrib/tensor_forest/kernels/tree_utils.cc b/tensorflow/contrib/tensor_forest/kernels/tree_utils.cc index 44997ec5d6..cefcc96051 100644 --- a/tensorflow/contrib/tensor_forest/kernels/tree_utils.cc +++ b/tensorflow/contrib/tensor_forest/kernels/tree_utils.cc @@ -421,7 +421,7 @@ double getChebyshevEpsilon(const std::vector& mu1, const std::vector& mu2) { // Math time!! // We are trying to minimize d = |mu1 - x|^2 + |mu2 - y|^2 over the surface. - // Using Langrange multipliers, we get + // Using Lagrange multipliers, we get // partial d / partial x = -2 mu1 + 2 x = lambda_1 1 + 2 lambda_3 x // partial d / partial y = -2 mu2 + 2 y = lambda_2 1 - 2 lambda_3 y // or @@ -485,7 +485,7 @@ double getChebyshevEpsilon(const std::vector& mu1, } double sdiscrim = sqrt(discrim); - // TODO(thomaswc): Analyze whetever one of these is always closer. + // TODO(thomaswc): Analyze whatever one of these is always closer. double v1 = (-b + sdiscrim) / (2 * a); double v2 = (-b - sdiscrim) / (2 * a); double dist1 = getDistanceFromLambda3(v1, mu1, mu2); diff --git a/tensorflow/contrib/tensor_forest/kernels/tree_utils.h b/tensorflow/contrib/tensor_forest/kernels/tree_utils.h index edbac67006..03aab1b61e 100644 --- a/tensorflow/contrib/tensor_forest/kernels/tree_utils.h +++ b/tensorflow/contrib/tensor_forest/kernels/tree_utils.h @@ -123,7 +123,7 @@ bool BestSplitDominatesRegression(const Tensor& total_sums, const Tensor& split_squares, int32 accumulator); -// Performs booststrap_samples bootstrap samples of the best split's class +// Performs bootstrap_samples bootstrap samples of the best split's class // counts and the second best splits's class counts, and returns true if at // least dominate_fraction of the time, the former has a better (lower) // Gini impurity. Does not take over ownership of *rand. diff --git a/tensorflow/contrib/tensor_forest/kernels/v4/decision-tree-resource.h b/tensorflow/contrib/tensor_forest/kernels/v4/decision-tree-resource.h index 328af28725..d3edb43733 100644 --- a/tensorflow/contrib/tensor_forest/kernels/v4/decision-tree-resource.h +++ b/tensorflow/contrib/tensor_forest/kernels/v4/decision-tree-resource.h @@ -60,7 +60,7 @@ class DecisionTreeResource : public ResourceBase { mutex* get_mutex() { return &mu_; } // Return the TreeNode for the leaf that the example ends up at according - // to decsion_tree_. Also fill in that leaf's depth if it isn't nullptr. + // to decision_tree_. Also fill in that leaf's depth if it isn't nullptr. int32 TraverseTree(const std::unique_ptr& input_data, int example, int32* depth, TreePath* path) const; diff --git a/tensorflow/contrib/tensor_forest/kernels/v4/decision_node_evaluator.h b/tensorflow/contrib/tensor_forest/kernels/v4/decision_node_evaluator.h index bf2b2aaa3c..3db351c328 100644 --- a/tensorflow/contrib/tensor_forest/kernels/v4/decision_node_evaluator.h +++ b/tensorflow/contrib/tensor_forest/kernels/v4/decision_node_evaluator.h @@ -60,7 +60,7 @@ class InequalityDecisionNodeEvaluator : public BinaryDecisionNodeEvaluator { bool include_equals_; }; -// Evalutor for splits with multiple weighted features. +// Evaluator for splits with multiple weighted features. class ObliqueInequalityDecisionNodeEvaluator : public BinaryDecisionNodeEvaluator { public: diff --git a/tensorflow/contrib/tensor_forest/ops/model_ops.cc b/tensorflow/contrib/tensor_forest/ops/model_ops.cc index 3099cccdf8..98124d519c 100644 --- a/tensorflow/contrib/tensor_forest/ops/model_ops.cc +++ b/tensorflow/contrib/tensor_forest/ops/model_ops.cc @@ -165,7 +165,7 @@ tree_handle: The handle to the tree. leaf_ids: `leaf_ids[i]` is the leaf id for input i. input_labels: The training batch's labels as a 1 or 2-d tensor. 'input_labels[i][j]' gives the j-th label/target for the i-th input. -input_weights: The training batch's eample weights as a 1-d tensor. +input_weights: The training batch's weights as a 1-d tensor. 'input_weights[i]' gives the weight for the i-th input. )doc"); diff --git a/tensorflow/contrib/tensor_forest/ops/stats_ops.cc b/tensorflow/contrib/tensor_forest/ops/stats_ops.cc index e8b5c5d8a6..5be581aaec 100644 --- a/tensorflow/contrib/tensor_forest/ops/stats_ops.cc +++ b/tensorflow/contrib/tensor_forest/ops/stats_ops.cc @@ -75,7 +75,7 @@ REGISTER_OP("GrowTreeV4") .Attr("params: string") .Input("tree_handle: resource") .Input("stats_handle: resource") - .Input("finshed_nodes: int32") + .Input("finished_nodes: int32") .SetShapeFn(tensorflow::shape_inference::NoOutputs) .Doc(R"doc( Grows the tree for finished nodes and allocates waiting nodes. @@ -83,7 +83,7 @@ Grows the tree for finished nodes and allocates waiting nodes. params: A serialized TensorForestParams proto. tree_handle: The handle to the tree. stats_handle: The handle to the stats. -finshed_nodes: A 1-d Tensor of finished node ids from ProcessInput. +finished_nodes: A 1-d Tensor of finished node ids from ProcessInput. )doc"); REGISTER_OP("ProcessInputV4") @@ -119,7 +119,7 @@ sparse_input_values: The values tensor from the SparseTensor input. sparse_input_shape: The shape tensor from the SparseTensor input. input_labels: The training batch's labels as a 1 or 2-d tensor. 'input_labels[i][j]' gives the j-th label/target for the i-th input. -input_weights: The training batch's eample weights as a 1-d tensor. +input_weights: The training batch's weights as a 1-d tensor. 'input_weights[i]' gives the weight for the i-th input. finished_nodes: A 1-d tensor of node ids that have finished and are ready to grow. diff --git a/tensorflow/contrib/tensor_forest/python/tensor_forest.py b/tensorflow/contrib/tensor_forest/python/tensor_forest.py index 3650b5d52f..b9bcbb170b 100644 --- a/tensorflow/contrib/tensor_forest/python/tensor_forest.py +++ b/tensorflow/contrib/tensor_forest/python/tensor_forest.py @@ -212,7 +212,7 @@ class ForestHParams(object): self.regression = getattr(self, 'regression', False) # Num_outputs is the actual number of outputs (a single prediction for - # classification, a N-dimenensional point for regression). + # classification, a N-dimensional point for regression). self.num_outputs = self.num_classes if self.regression else 1 # Add an extra column to classes for storing counts, which is needed for diff --git a/tensorflow/contrib/tensorrt/BUILD b/tensorflow/contrib/tensorrt/BUILD index 2f316767b3..f80b4f1b11 100644 --- a/tensorflow/contrib/tensorrt/BUILD +++ b/tensorflow/contrib/tensorrt/BUILD @@ -11,6 +11,7 @@ exports_files(["LICENSE"]) load( "//tensorflow:tensorflow.bzl", + "py_test", "tf_cc_test", "tf_copts", "tf_cuda_library", @@ -52,7 +53,6 @@ tf_custom_op_library( "ops/trt_engine_op.cc", ], deps = [ - ":trt_engine_op_kernel", ":trt_shape_function", "//tensorflow/core:lib_proto_parsing", ] + if_tensorrt([ @@ -140,6 +140,7 @@ tf_custom_op_py_library( ]), srcs_version = "PY2AND3", deps = [ + "//tensorflow/contrib/util:util_py", "//tensorflow/python:framework_for_generated_wrappers", "//tensorflow/python:resources", ], @@ -174,6 +175,7 @@ py_library( srcs_version = "PY2AND3", deps = [ ":wrap_conversion", + "//tensorflow/python:tf_optimizer", ], ) @@ -183,6 +185,7 @@ tf_py_wrap_cc( copts = tf_copts(), deps = [ ":trt_conversion", + ":trt_engine_op_kernel", "//tensorflow/core:framework_lite", "//util/python:python_headers", ], @@ -272,3 +275,19 @@ tf_cc_test( "//tensorflow/core:test_main", ], ) + +py_test( + name = "tf_trt_integration_test", + srcs = ["test/tf_trt_integration_test.py"], + main = "test/tf_trt_integration_test.py", + srcs_version = "PY2AND3", + tags = [ + "manual", + "notap", + ], + deps = [ + ":init_py", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework_test_lib", + ], +) diff --git a/tensorflow/contrib/tensorrt/README.md b/tensorflow/contrib/tensorrt/README.md index 6eafc1754c..687dee07e1 100644 --- a/tensorflow/contrib/tensorrt/README.md +++ b/tensorflow/contrib/tensorrt/README.md @@ -1,59 +1,29 @@ # Using TensorRT in TensorFlow - -This module provides necessary bindings and introduces TRT_engine_op -operator that wraps a subgraph in TensorRT. This is still a work in progress -but should be useable with most common graphs. +This module provides necessary bindings and introduces TRT_engine_op operator +that wraps a subgraph in TensorRT. This is still a work in progress but should +be useable with most common graphs. ## Compilation - -In order to compile the module, you need to have a local TensorRT -installation ( libnvinfer.so and respective include files ). During the -configuration step, TensorRT should be enabled and installation path -should be set. If installed through package managers (deb,rpm), -configure script should find the necessary components from the system -automatically. If installed from tar packages, user has to set path to -location where the library is installed during configuration. +In order to compile the module, you need to have a local TensorRT installation +(libnvinfer.so and respective include files). During the configuration step, +TensorRT should be enabled and installation path should be set. If installed +through package managers (deb,rpm), configure script should find the necessary +components from the system automatically. If installed from tar packages, user +has to set path to location where the library is installed during configuration. ```shell bazel build --config=cuda --config=opt //tensorflow/tools/pip_package:build_pip_package bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/ ``` -After the installation of tensorflow package, TensorRT transformation -will be available. An example use can be found in test/test_tftrt.py script +After the installation of tensorflow package, TensorRT transformation will be +available. An example use can be found in test/test_tftrt.py script ## Installing TensorRT 3.0.4 -In order to make use of TensorRT integration, you will need a local installation of TensorRT 3.0.4 from the [NVIDIA Developer website](https://developer.nvidia.com/tensorrt). Due to compiler compatibility, you will need to download and install the TensorRT 3.0.4 tarball for _Ubuntu 14.04_, i.e., **_TensorRT-3.0.4.Ubuntu-14.04.5.x86_64.cuda-9.0.cudnn7.0-tar.gz_**, even if you are using Ubuntu 16.04 or later. - -### Preparing TensorRT installation - -Once you have downloaded TensorRT-3.0.4.Ubuntu-14.04.5.x86_64.cuda-9.0.cudnn7.0-tar.gz, you will need to unpack it to an installation directory, which will be referred to as . Please replace with the full path of actual installation directory you choose in commands below. - -```shell -cd && tar -zxf /path/to/TensorRT-3.0.4.Ubuntu-14.04.5.x86_64.cuda-9.0.cudnn7.0-tar.gz -``` - -After unpacking the binaries, you have several options to use them: - -#### To run TensorFlow as a user without superuser privileges - -For a regular user without any sudo rights, you should add TensorRT to your `$LD_LIBRARY_PATH`: - - ```shell - export LD_LIBRARY_PATH=/TensorRT-3.0.4/lib${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}} - ``` - -Then you are ready to use TensorFlow-TensorRT integration. `$LD_LIBRARY_PATH` must contain the path to TensorRT installation for TensorFlow-TensorRT integration to work. If you are using a VirtualEnv-like setup, you can add the command above to your `bin/activate` script or to your `.bashrc` script. - -#### To run TensorFlow as a superuser - - When running as a superuser, such as in a container or via sudo, the `$LD_LIBRARY_PATH` approach above may not work. The following is preferred when the user has superuser privileges: - - ```shell - echo "/TensorRT-3.0.4/lib" | sudo tee /etc/ld.so.conf.d/tensorrt304.conf && sudo ldconfig - ``` - - Please ensure that any existing deb package installation of TensorRT is removed before following these instructions to avoid package conflicts. \ No newline at end of file +In order to make use of TensorRT integration, you will need a local installation +of TensorRT 3.0.4 from the [NVIDIA Developer website](https://developer.nvidia.com/tensorrt). +Installation instructions for compatibility with TensorFlow are provided on the +[TensorFlow Installation page](https://www.tensorflow.org/install/install_linux#nvidia_requirements_to_run_tensorflow_with_gpu_support). diff --git a/tensorflow/contrib/tensorrt/resources/trt_resource_manager.cc b/tensorflow/contrib/tensorrt/resources/trt_resource_manager.cc index e663eed4dd..9c3698e5d1 100644 --- a/tensorflow/contrib/tensorrt/resources/trt_resource_manager.cc +++ b/tensorflow/contrib/tensorrt/resources/trt_resource_manager.cc @@ -19,6 +19,12 @@ limitations under the License. namespace tensorflow { namespace tensorrt { +std::shared_ptr +tensorflow::tensorrt::TRTResourceManager::instance() { + static std::shared_ptr instance_(new TRTResourceManager); + return instance_; +} + std::shared_ptr tensorflow::tensorrt::TRTResourceManager::getManager(const string& op_name) { // mutex is held for lookup only. Most instantiations where mutex will be held diff --git a/tensorflow/contrib/tensorrt/resources/trt_resource_manager.h b/tensorflow/contrib/tensorrt/resources/trt_resource_manager.h index 5f8ad491d3..bc15b51e05 100644 --- a/tensorflow/contrib/tensorrt/resources/trt_resource_manager.h +++ b/tensorflow/contrib/tensorrt/resources/trt_resource_manager.h @@ -29,11 +29,7 @@ class TRTResourceManager { TRTResourceManager() = default; public: - static std::shared_ptr instance() { - static std::shared_ptr instance_( - new TRTResourceManager); - return instance_; - } + static std::shared_ptr instance(); // returns a manager for given op, if it doesn't exists it creates one std::shared_ptr getManager(const string& op_name); diff --git a/tensorflow/contrib/tensorrt/test/tf_trt_integration_test.py b/tensorflow/contrib/tensorrt/test/tf_trt_integration_test.py new file mode 100644 index 0000000000..7a47328762 --- /dev/null +++ b/tensorflow/contrib/tensorrt/test/tf_trt_integration_test.py @@ -0,0 +1,156 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Script to test TF-TensorRT integration.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import warnings +import numpy as np + +from tensorflow.contrib import tensorrt as trt +from tensorflow.core.protobuf import config_pb2 as cpb2 +from tensorflow.python.framework import constant_op as cop +from tensorflow.python.framework import dtypes as dtypes +from tensorflow.python.framework import importer as importer +from tensorflow.python.framework import ops as ops +from tensorflow.python.framework import test_util +from tensorflow.python.ops import array_ops as aops +from tensorflow.python.ops import nn as nn +from tensorflow.python.ops import nn_ops as nn_ops +from tensorflow.python.platform import googletest + + +@test_util.with_c_api +class IntegrationTest(test_util.TensorFlowTestCase): + """Class to test Tensorflow-TensorRT integration.""" + + def setUp(self): + """Setup method.""" + super(IntegrationTest, self).setUp() + warnings.simplefilter("always") + inp_dims = (100, 24, 24, 2) + self._input = np.random.random_sample(inp_dims) + self._original_graph = self.get_simple_graph_def() + self._gpu_options = cpb2.GPUOptions( + per_process_gpu_memory_fraction=0.50) + self._config = cpb2.ConfigProto(gpu_options=self._gpu_options) + self._reference = self.run_graph(self._original_graph, self._input) + + def get_simple_graph_def(self): + """Create a simple graph and return its graph_def.""" + g = ops.Graph() + with g.as_default(): + a = aops.placeholder( + dtype=dtypes.float32, shape=(None, 24, 24, 2), name="input") + e = cop.constant( + [[[[1., 0.5, 4., 6., 0.5, 1.], [1., 0.5, 1., 1., 0.5, 1.]]]], + name="weights", + dtype=dtypes.float32) + conv = nn.conv2d( + input=a, + filter=e, + strides=[1, 2, 2, 1], + padding="SAME", + name="conv") + b = cop.constant( + [4., 1.5, 2., 3., 5., 7.], name="bias", dtype=dtypes.float32) + t = nn.bias_add(conv, b, name="biasAdd") + relu = nn.relu(t, "relu") + idty = aops.identity(relu, "ID") + v = nn_ops.max_pool( + idty, [1, 2, 2, 1], [1, 2, 2, 1], "VALID", name="max_pool") + aops.squeeze(v, name="output") + return g.as_graph_def() + + def run_graph(self, gdef, dumm_inp): + """Run given graphdef once.""" + ops.reset_default_graph() + g = ops.Graph() + with g.as_default(): + inp, out = importer.import_graph_def( + graph_def=gdef, return_elements=["input", "output"]) + inp = inp.outputs[0] + out = out.outputs[0] + with self.test_session( + graph=g, config=self._config, use_gpu=True, + force_gpu=True) as sess: + val = sess.run(out, {inp: dumm_inp}) + return val + + # Use real data that is representative of the inference dataset + # for calibration. For this test script it is random data. + def run_calibration(self, gdef, dumm_inp): + """Run given calibration graph multiple times.""" + ops.reset_default_graph() + g = ops.Graph() + with g.as_default(): + inp, out = importer.import_graph_def( + graph_def=gdef, return_elements=["input", "output"]) + inp = inp.outputs[0] + out = out.outputs[0] + # run over real calibration data here, we are mimicking a calibration + # set of 30 different batches. Use as much calibration data as you want + with self.test_session( + graph=g, config=self._config, use_gpu=True, + force_gpu=True) as sess: + for _ in range(30): + val = sess.run(out, {inp: dumm_inp}) + return val + + def get_trt_graph(self, mode): + """Return trt converted graph.""" + if mode in ["FP32", "FP16", "INT8"]: + return trt.create_inference_graph( + input_graph_def=self._original_graph, + outputs=["output"], + max_batch_size=self._input.shape[0], + max_workspace_size_bytes=1 << 25, + precision_mode=mode, # TRT Engine precision "FP32","FP16" or "INT8" + minimum_segment_size=2 # minimum number of nodes in an engine + ) + return None + + def testFP32(self): + """Test FP32 conversion. Results should be identical to native case.""" + trt_graph = self.get_trt_graph("FP32") + result = self.run_graph(trt_graph, self._input) + self.assertAllEqual(self._reference, result) + result1 = self.run_graph(trt_graph, self._input) + self.assertAllEqual(result1, result) + + def testFP16(self): + """Test FP16 conversion. Results may be different from native case.""" + trt_graph = self.get_trt_graph("FP16") + result = self.run_graph(trt_graph, self._input) + self.assertAllClose(self._reference, result, rtol=1.e-03) + result1 = self.run_graph(trt_graph, self._input) + self.assertAllEqual(result1, result) + + def testINT8(self): + """Test INT8 conversion. Results may be different from native case.""" + calib_graph = self.get_trt_graph("INT8") + result = self.run_calibration(calib_graph, self._input) + self.assertAllEqual(self._reference, result) + int8_graph = trt.calib_graph_to_infer_graph(calib_graph) + result = self.run_graph(int8_graph, self._input) + self.assertAllClose(self._reference, result, rtol=1.e-03) + result1 = self.run_graph(int8_graph, self._input) + self.assertAllEqual(result1, result) + + +if __name__ == "__main__": + googletest.main() diff --git a/tensorflow/contrib/timeseries/python/timeseries/math_utils.py b/tensorflow/contrib/timeseries/python/timeseries/math_utils.py index 26793c80bf..9b593fecbb 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/math_utils.py +++ b/tensorflow/contrib/timeseries/python/timeseries/math_utils.py @@ -60,7 +60,7 @@ def clip_covariance( # TODO(allenl): Smarter scaling here so that correlations are preserved when # fiddling with diagonal elements. diagonal = array_ops.matrix_diag_part(covariance_matrix) - maximum = math_ops.reduce_max(diagonal, axis=-1, keep_dims=True) + maximum = math_ops.reduce_max(diagonal, axis=-1, keepdims=True) new_diagonal = gen_math_ops.maximum( diagonal, maximum / maximum_variance_ratio) return array_ops.matrix_set_diag( diff --git a/tensorflow/contrib/training/python/training/resample.py b/tensorflow/contrib/training/python/training/resample.py index b16159bc16..7b8332b1d6 100644 --- a/tensorflow/contrib/training/python/training/resample.py +++ b/tensorflow/contrib/training/python/training/resample.py @@ -77,7 +77,7 @@ def resample_at_rate(inputs, rates, scope=None, seed=None, back_prop=False): Args: inputs: A list of tensors, each of which has a shape of `[batch_size, ...]` - rates: A tensor of shape `[batch_size]` contiaining the resampling rates + rates: A tensor of shape `[batch_size]` containing the resampling rates for each input. scope: Scope for the op. seed: Random seed to use. diff --git a/tensorflow/contrib/training/python/training/sampling_ops.py b/tensorflow/contrib/training/python/training/sampling_ops.py index ba888f87dc..7140f2a46d 100644 --- a/tensorflow/contrib/training/python/training/sampling_ops.py +++ b/tensorflow/contrib/training/python/training/sampling_ops.py @@ -123,7 +123,7 @@ def rejection_sample(tensors, batch_size=batch_size, num_threads=queue_threads) - # Queues return a single tensor if the list of enqued tensors is one. Since + # Queues return a single tensor if the list of enqueued tensors is one. Since # we want the type to always be the same, always return a list. if isinstance(minibatch, ops.Tensor): minibatch = [minibatch] @@ -312,7 +312,7 @@ def _verify_input(tensor_list, labels, probs_list): """Verify that batched inputs are well-formed.""" checked_probs_list = [] for probs in probs_list: - # Since number of classes shouldn't change at runtime, probalities shape + # Since number of classes shouldn't change at runtime, probabilities shape # should be fully defined. probs.get_shape().assert_is_fully_defined() @@ -407,7 +407,7 @@ def _calculate_acceptance_probabilities(init_probs, target_probs): ``` - A solution for a_i in terms of the other variabes is the following: + A solution for a_i in terms of the other variables is the following: ```a_i = (t_i / p_i) / max_i[t_i / p_i]``` """ # Make list of t_i / p_i. diff --git a/tensorflow/contrib/training/python/training/sequence_queueing_state_saver.py b/tensorflow/contrib/training/python/training/sequence_queueing_state_saver.py index 99d486b183..39d75a0806 100644 --- a/tensorflow/contrib/training/python/training/sequence_queueing_state_saver.py +++ b/tensorflow/contrib/training/python/training/sequence_queueing_state_saver.py @@ -876,7 +876,7 @@ class SequenceQueueingStateSaver(object): ]): self._length = array_ops.identity(self._length) - # Only create barrier; enqueu and dequeue operations happen when you + # Only create barrier; enqueue and dequeue operations happen when you # access prefetch_op and next_batch. self._create_barrier() self._scope = scope @@ -1637,7 +1637,7 @@ def _move_sparse_tensor_out_context(input_context, input_sequences, num_unroll): For `key, value` pairs in `input_context` with `SparseTensor` `value` removes them from `input_context` and transforms the `value` into a sequence and - then adding `key`, transformed `value` into `input_seuqences`. + then adding `key`, transformed `value` into `input_sequences`. The transformation is done by adding a new first dimension of `value_length` equal to that of the other values in input_sequences` and tiling the `value` every `num_unroll` steps. diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index a2ff29724b..ba1fd41565 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -145,6 +145,7 @@ load( "if_static", ) load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda") +load("@io_bazel_rules_closure//closure:defs.bzl", "closure_proto_library") load( "//third_party/mkl:build_defs.bzl", "if_mkl", @@ -247,6 +248,15 @@ tf_nano_proto_library( deps = [":protos_all_cc"], ) +proto_library( + name = "example_protos", + srcs = [ + "example/example.proto", + "example/feature.proto", + ], + visibility = ["//visibility:public"], +) + exports_files([ "framework/types.proto", ]) @@ -4066,3 +4076,9 @@ alias( actual = ":mobile_srcs", visibility = ["//visibility:public"], ) + +closure_proto_library( + name = "example_protos_closure", + visibility = ["//visibility:public"], + deps = [":example_protos"], +) diff --git a/tensorflow/core/api_def/base_api/api_def_ApplyAdaMax.pbtxt b/tensorflow/core/api_def/base_api/api_def_ApplyAdaMax.pbtxt new file mode 100644 index 0000000000..145d05de59 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_ApplyAdaMax.pbtxt @@ -0,0 +1,78 @@ +op { + graph_op_name: "ApplyAdaMax" + visibility: HIDDEN + in_arg { + name: "var" + description: <>> x = tf.constant([1, 2, 3]) +>>> y = tf.broadcast_to(x, [3, 3]) +>>> sess.run(y) +array([[1, 2, 3], + [1, 2, 3], + [1, 2, 3]], dtype=int32) +``` +In the above example, the input Tensor with the shape of `[1, 3]` +is broadcasted to output Tensor with shape of `[3, 3]`. +END +} diff --git a/tensorflow/core/api_def/base_api/api_def_ImageSummary.pbtxt b/tensorflow/core/api_def/base_api/api_def_ImageSummary.pbtxt index 9b00f5b19d..56a3658fa0 100644 --- a/tensorflow/core/api_def/base_api/api_def_ImageSummary.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_ImageSummary.pbtxt @@ -61,7 +61,7 @@ build the `tag` of the summary values: generated sequentially as '*tag*/image/0', '*tag*/image/1', etc. The `bad_color` argument is the color to use in the generated images for -non-finite input values. It is a `unit8` 1-D tensor of length `channels`. +non-finite input values. It is a `uint8` 1-D tensor of length `channels`. Each element must be in the range `[0, 255]` (It represents the value of a pixel in the output image). Non-finite values in the input tensor are replaced by this tensor in the output image. The default value is the color diff --git a/tensorflow/core/api_def/base_api/api_def_ResourceApplyAdaMax.pbtxt b/tensorflow/core/api_def/base_api/api_def_ResourceApplyAdaMax.pbtxt new file mode 100644 index 0000000000..a3f2188ba5 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_ResourceApplyAdaMax.pbtxt @@ -0,0 +1,72 @@ +op { + graph_op_name: "ResourceApplyAdaMax" + visibility: HIDDEN + in_arg { + name: "var" + description: <
If you encounter installation problems, see [Common Installation Problems](#common_installation_problems). @@ -299,7 +321,7 @@ take the following steps:
      $ sudo pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.7.0-cp34-cp34m-linux_x86_64.whl
+     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc0-cp34-cp34m-linux_x86_64.whl
      
If this step fails, see @@ -485,7 +507,7 @@ Take the following steps to install TensorFlow in an Anaconda environment:
      (tensorflow)$ pip install --ignore-installed --upgrade \
-     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.7.0-cp34-cp34m-linux_x86_64.whl
+ https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc0-cp34-cp34m-linux_x86_64.whl ## Validate your installation @@ -659,14 +681,14 @@ This section documents the relevant values for Linux installations. CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.7.0-cp27-none-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc0-cp27-none-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.7.0-cp27-none-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0rc0-cp27-none-linux_x86_64.whl
 
Note that GPU support requires the NVIDIA hardware and software described in @@ -678,14 +700,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.7.0-cp34-cp34m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc0-cp34-cp34m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.7.0-cp34-cp34m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0rc0-cp34-cp34m-linux_x86_64.whl
 
Note that GPU support requires the NVIDIA hardware and software described in @@ -697,14 +719,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.7.0-cp35-cp35m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc0-cp35-cp35m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.7.0-cp35-cp35m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0rc0-cp35-cp35m-linux_x86_64.whl
 
@@ -716,14 +738,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.7.0-cp36-cp36m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc0-cp36-cp36m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.7.0-cp36-cp36m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0rc0-cp36-cp36m-linux_x86_64.whl
 
diff --git a/tensorflow/docs_src/install/install_mac.md b/tensorflow/docs_src/install/install_mac.md index b3e9616a05..a237d1af54 100644 --- a/tensorflow/docs_src/install/install_mac.md +++ b/tensorflow/docs_src/install/install_mac.md @@ -119,7 +119,7 @@ Take the following steps to install TensorFlow with Virtualenv: TensorFlow in the active Virtualenv is as follows:
 $ pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.7.0-py3-none-any.whl
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc0-py3-none-any.whl If you encounter installation problems, see [Common Installation Problems](#common-installation-problems). @@ -242,7 +242,7 @@ take the following steps: issue the following command:
 $ sudo pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.7.0-py3-none-any.whl 
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc0-py3-none-any.whl If the preceding command fails, see [installation problems](#common-installation-problems). @@ -350,7 +350,7 @@ Take the following steps to install TensorFlow in an Anaconda environment: TensorFlow for Python 2.7:
 (targetDirectory)$ pip install --ignore-installed --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.7.0-py2-none-any.whl
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc0-py2-none-any.whl @@ -524,7 +524,7 @@ The value you specify depends on your Python version.
-https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.7.0-py2-none-any.whl
+https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc0-py2-none-any.whl
 
@@ -532,5 +532,5 @@ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.7.0-py2-none-any.
-https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.7.0-py3-none-any.whl
+https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc0-py3-none-any.whl
 
diff --git a/tensorflow/docs_src/install/install_sources.md b/tensorflow/docs_src/install/install_sources.md index 26287aa3a1..b186758653 100644 --- a/tensorflow/docs_src/install/install_sources.md +++ b/tensorflow/docs_src/install/install_sources.md @@ -354,10 +354,10 @@ Invoke `pip install` to install that pip package. The filename of the `.whl` file depends on your platform. For example, the following command will install the pip package -for TensorFlow 1.7.0 on Linux: +for TensorFlow 1.8.0rc0 on Linux:
-$ sudo pip install /tmp/tensorflow_pkg/tensorflow-1.7.0-py2-none-any.whl
+$ sudo pip install /tmp/tensorflow_pkg/tensorflow-1.8.0rc0-py2-none-any.whl
 
## Validate your installation @@ -454,6 +454,8 @@ Stack Overflow and specify the `tensorflow` tag. **Linux** + + @@ -475,6 +477,7 @@ Stack Overflow and specify the `tensorflow` tag. **Mac**
Version:CPU/GPU:Python Version:Compiler:Build Tools:cuDNN:CUDA:
tensorflow-1.8.0CPU2.7, 3.3-3.6GCC 4.8Bazel 0.10.0N/AN/A
tensorflow_gpu-1.8.0GPU2.7, 3.3-3.6GCC 4.8Bazel 0.9.079
tensorflow-1.7.0CPU2.7, 3.3-3.6GCC 4.8Bazel 0.10.0N/AN/A
tensorflow_gpu-1.7.0GPU2.7, 3.3-3.6GCC 4.8Bazel 0.9.079
tensorflow-1.6.0CPU2.7, 3.3-3.6GCC 4.8Bazel 0.9.0N/AN/A
+ @@ -490,6 +493,8 @@ Stack Overflow and specify the `tensorflow` tag. **Windows**
Version:CPU/GPU:Python Version:Compiler:Build Tools:cuDNN:CUDA:
tensorflow-1.8.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.10.1N/AN/A
tensorflow-1.7.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.10.1N/AN/A
tensorflow-1.6.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.8.1N/AN/A
tensorflow-1.5.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.8.1N/AN/A
+ + diff --git a/tensorflow/docs_src/mobile/android_build.md b/tensorflow/docs_src/mobile/android_build.md index 08a5fbe41c..c35530061d 100644 --- a/tensorflow/docs_src/mobile/android_build.md +++ b/tensorflow/docs_src/mobile/android_build.md @@ -51,7 +51,8 @@ If you haven't already, do the following two things: // set to 'bazel', 'cmake', 'makefile', 'none' def nativeBuildSystem = 'none' -4. Click the Run button (the green arrow) or use **Run -> Run 'android'** from the top menu. +4. Click the *Run* button (the green arrow) or select *Run > Run 'android'* from the + top menu. You may need to rebuild the project using *Build > Rebuild Project*. If it asks you to use Instant Run, click **Proceed Without Instant Run**. diff --git a/tensorflow/docs_src/performance/quantization.md b/tensorflow/docs_src/performance/quantization.md index 411889cb1c..2fea02d861 100644 --- a/tensorflow/docs_src/performance/quantization.md +++ b/tensorflow/docs_src/performance/quantization.md @@ -110,7 +110,7 @@ we've added a separate rewrite for the *eval graph*: ``` # Build eval model -logits = tf.nn.softmax_cross_entropy_with_logits(...) +logits = tf.nn.softmax_cross_entropy_with_logits_v2(...) # Call the eval rewrite which rewrites the graph in-place with # FakeQuantization nodes and fold batchnorm for eval. diff --git a/tensorflow/docs_src/programmers_guide/debugger.md b/tensorflow/docs_src/programmers_guide/debugger.md index f5a0eb0a20..f7817b06d4 100644 --- a/tensorflow/docs_src/programmers_guide/debugger.md +++ b/tensorflow/docs_src/programmers_guide/debugger.md @@ -400,7 +400,7 @@ diff = -(y_ * tf.log(y)) to the built-in, numerically-stable implementation of softmax cross-entropy: ```python -diff = tf.losses.sparse_softmax_cross_entropy(labels=y_, logits=logits) +diff = tf.losses.softmax_cross_entropy(labels=y_, logits=logits) ``` Rerun with the `--debug` flag as follows: diff --git a/tensorflow/docs_src/programmers_guide/graphs.md b/tensorflow/docs_src/programmers_guide/graphs.md index aa72cae766..f0dd8def17 100644 --- a/tensorflow/docs_src/programmers_guide/graphs.md +++ b/tensorflow/docs_src/programmers_guide/graphs.md @@ -210,7 +210,7 @@ with tf.device("/device:GPU:0"): # Operations created in this context will be pinned to the GPU. result = tf.matmul(weights, img) ``` -If you are deploying TensorFlow in a @{$deploy/distributed$typical distributed configuration}, +If you are deploying TensorFlow in a @{$distributed$typical distributed configuration}, you might specify the job name and task ID to place variables on a task in the parameter server job (`"/job:ps"`), and the other operations on task in the worker job (`"/job:worker"`): @@ -362,7 +362,7 @@ operations that are needed to compute the result. @{tf.Session.run} requires you to specify a list of **fetches**, which determine the return values, and may be a @{tf.Operation}, a @{tf.Tensor}, or -a [tensor-like type](#tensor-like-objects) such as @{tf.Variable}. These fetches +a [tensor-like type](#tensor-like_objects) such as @{tf.Variable}. These fetches determine what **subgraph** of the overall @{tf.Graph} must be executed to produce the result: this is the subgraph that contains all operations named in the fetch list, plus all operations whose outputs are used to compute the value @@ -505,7 +505,7 @@ multiple graphs in the same process. As noted above, TensorFlow provides a "default graph" that is implicitly passed to all API functions in the same context. For many applications, a single graph is sufficient. However, TensorFlow also provides methods for manipulating -the default graph, which can be useful in more advanced used cases. For example: +the default graph, which can be useful in more advanced use cases. For example: * A @{tf.Graph} defines the namespace for @{tf.Operation} objects: each operation in a single graph must have a unique name. TensorFlow will diff --git a/tensorflow/docs_src/programmers_guide/saved_model.md b/tensorflow/docs_src/programmers_guide/saved_model.md index 55ee42dd64..c6ef87c54a 100644 --- a/tensorflow/docs_src/programmers_guide/saved_model.md +++ b/tensorflow/docs_src/programmers_guide/saved_model.md @@ -485,31 +485,7 @@ portion of the signature. That is, when writing a to expect and how to map them to your model's expected inputs. By contrast, the *output* portion of the signature is determined by the model. - -### Perform the export - -To export your trained Estimator, call -@{tf.estimator.Estimator.export_savedmodel} with the export base path and -the `serving_input_receiver_fn`. - -```py -estimator.export_savedmodel(export_dir_base, serving_input_receiver_fn, - strip_default_attrs=True) -``` - -This method builds a new graph by first calling the -`serving_input_receiver_fn()` to obtain feature `Tensor`s, and then calling -this `Estimator`'s `model_fn()` to generate the model graph based on those -features. It starts a fresh `Session`, and, by default, restores the most recent -checkpoint into it. (A different checkpoint may be passed, if needed.) -Finally it creates a time-stamped export directory below the given -`export_dir_base` (i.e., `export_dir_base/`), and writes a -SavedModel into it containing a single `MetaGraphDef` saved from this -Session. - -> Note: It is your responsibility to garbage-collect old exports. -> Otherwise, successive exports will accumulate under `export_dir_base`. - + ### Specify the outputs of a custom model When writing a custom `model_fn`, you must populate the `export_outputs` element @@ -541,6 +517,30 @@ using [`signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY`](https://www.tens indicating which `SignatureDef` will be served when an inference request does not specify one. + +### Perform the export + +To export your trained Estimator, call +@{tf.estimator.Estimator.export_savedmodel} with the export base path and +the `serving_input_receiver_fn`. + +```py +estimator.export_savedmodel(export_dir_base, serving_input_receiver_fn, + strip_default_attrs=True) +``` + +This method builds a new graph by first calling the +`serving_input_receiver_fn()` to obtain feature `Tensor`s, and then calling +this `Estimator`'s `model_fn()` to generate the model graph based on those +features. It starts a fresh `Session`, and, by default, restores the most recent +checkpoint into it. (A different checkpoint may be passed, if needed.) +Finally it creates a time-stamped export directory below the given +`export_dir_base` (i.e., `export_dir_base/`), and writes a +SavedModel into it containing a single `MetaGraphDef` saved from this +Session. + +> Note: It is your responsibility to garbage-collect old exports. +> Otherwise, successive exports will accumulate under `export_dir_base`. ### Serve the exported model locally diff --git a/tensorflow/docs_src/programmers_guide/using_tpu.md b/tensorflow/docs_src/programmers_guide/using_tpu.md index cb0d86fc4c..5e3e49d434 100644 --- a/tensorflow/docs_src/programmers_guide/using_tpu.md +++ b/tensorflow/docs_src/programmers_guide/using_tpu.md @@ -280,8 +280,8 @@ Where `params['batch-size']` will contain the batch size. ### Static shapes and batch size The input pipeline generated by your `input_fn` is run on CPU. So it is mostly -free strict static shape requirements imposed by the XLA/TPU environment. The -one requirement is that the batches of data fed from your input pipeline to +free from the strict static shape requirements imposed by the XLA/TPU environment. +The one requirement is that the batches of data fed from your input pipeline to the TPU have a static shape, as determined by the standard TensorFlow shape inference algorithm. Intermediate tensors are free to have a dynamic shapes. If shape inference has failed, but the shape is known it is possible to diff --git a/tensorflow/docs_src/tutorials/audio_recognition.md b/tensorflow/docs_src/tutorials/audio_recognition.md index 7d79f433c4..372ab47df7 100644 --- a/tensorflow/docs_src/tutorials/audio_recognition.md +++ b/tensorflow/docs_src/tutorials/audio_recognition.md @@ -280,7 +280,7 @@ tool: ``` bazel run tensorflow/examples/wav_to_spectrogram:wav_to_spectrogram -- \ --input_wav=/tmp/speech_dataset/happy/ab00c4b2_nohash_0.wav \ ---output_png=/tmp/spectrogram.png +--output_image=/tmp/spectrogram.png ``` If you open up `/tmp/spectrogram.png` you should see something like this: diff --git a/tensorflow/docs_src/tutorials/layers.md b/tensorflow/docs_src/tutorials/layers.md index cadaec391d..37cd2bb139 100644 --- a/tensorflow/docs_src/tutorials/layers.md +++ b/tensorflow/docs_src/tutorials/layers.md @@ -192,8 +192,7 @@ dive deeper into the `tf.layers` code used to create each layer, as well as how to calculate loss, configure the training op, and generate predictions. If you're already experienced with CNNs and @{$get_started/custom_estimators$TensorFlow `Estimator`s}, and find the above code intuitive, you may want to skim these sections or just -skip ahead to ["Training and Evaluating the CNN MNIST -Classifier"](#training_and_evaluating_the_cnn_mnist_classifier). +skip ahead to ["Training and Evaluating the CNN MNIST Classifier"](#train_eval_mnist). ### Input Layer @@ -536,8 +535,9 @@ if mode == tf.estimator.ModeKeys.TRAIN: ``` > Note: For a more in-depth look at configuring training ops for Estimator model -> functions, see @{$get_started/custom_estimators#defining_the_training_op_for_the_model$"Defining the training op for the model"} -> in the @{$get_started/custom_estimators$"Creating Estimators in tf.estimator."} tutorial. +> functions, see @{$get_started/custom_estimators#defining-the-training-op-for-the-model$"Defining the training op for the model"} +> in the @{$get_started/custom_estimators$"Creating Estimations in tf.estimator"} tutorial. + ### Add evaluation metrics @@ -552,7 +552,8 @@ return tf.estimator.EstimatorSpec( mode=mode, loss=loss, eval_metric_ops=eval_metric_ops) ``` -## Training and Evaluating the CNN MNIST Classifier {#training_and_evaluating_the_cnn_mnist_classifier} + +## Training and Evaluating the CNN MNIST Classifier We've coded our MNIST CNN model function; now we're ready to train and evaluate it. @@ -612,9 +613,9 @@ following to `main()`: ```python # Set up logging for predictions - tensors_to_log = {"probabilities": "softmax_tensor"} - logging_hook = tf.train.LoggingTensorHook( - tensors=tensors_to_log, every_n_iter=50) +tensors_to_log = {"probabilities": "softmax_tensor"} +logging_hook = tf.train.LoggingTensorHook( + tensors=tensors_to_log, every_n_iter=50) ``` We store a dict of the tensors we want to log in `tensors_to_log`. Each key is a diff --git a/tensorflow/examples/tutorials/word2vec/word2vec_basic.py b/tensorflow/examples/tutorials/word2vec/word2vec_basic.py index 14ae7fbf35..b09ee99768 100644 --- a/tensorflow/examples/tutorials/word2vec/word2vec_basic.py +++ b/tensorflow/examples/tutorials/word2vec/word2vec_basic.py @@ -224,7 +224,7 @@ with graph.as_default(): optimizer = tf.train.GradientDescentOptimizer(1.0).minimize(loss) # Compute the cosine similarity between minibatch examples and all embeddings. - norm = tf.sqrt(tf.reduce_sum(tf.square(embeddings), 1, keep_dims=True)) + norm = tf.sqrt(tf.reduce_sum(tf.square(embeddings), 1, keepdims=True)) normalized_embeddings = embeddings / norm valid_embeddings = tf.nn.embedding_lookup(normalized_embeddings, valid_dataset) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index ec7d9dcc4f..c31ca8b67a 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -21159,7 +21159,7 @@ func ImageSummaryBadColor(value tf.Tensor) ImageSummaryAttr { // generated sequentially as '*tag*/image/0', '*tag*/image/1', etc. // // The `bad_color` argument is the color to use in the generated images for -// non-finite input values. It is a `unit8` 1-D tensor of length `channels`. +// non-finite input values. It is a `uint8` 1-D tensor of length `channels`. // Each element must be in the range `[0, 255]` (It represents the value of a // pixel in the output image). Non-finite values in the input tensor are // replaced by this tensor in the output image. The default value is the color diff --git a/tensorflow/java/src/main/java/org/tensorflow/examples/LabelImage.java b/tensorflow/java/src/main/java/org/tensorflow/examples/LabelImage.java index 489e95c310..3948991c84 100644 --- a/tensorflow/java/src/main/java/org/tensorflow/examples/LabelImage.java +++ b/tensorflow/java/src/main/java/org/tensorflow/examples/LabelImage.java @@ -101,6 +101,7 @@ public class LabelImage { b.constant("mean", mean)), b.constant("scale", scale)); try (Session s = new Session(g)) { + // Generally, there may be multiple output tensors, all of them must be closed to prevent resource leaks. return s.runner().fetch(output.op().name()).run().get(0).expect(Float.class); } } @@ -110,6 +111,7 @@ public class LabelImage { try (Graph g = new Graph()) { g.importGraphDef(graphDef); try (Session s = new Session(g); + // Generally, there may be multiple output tensors, all of them must be closed to prevent resource leaks. Tensor result = s.runner().feed("input", image).fetch("output").run().get(0).expect(Float.class)) { final long[] rshape = result.shape(); diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 9dc03d7cdb..8e7f0cadad 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -1946,7 +1946,8 @@ py_library( ":array_ops", ":constant_op", ":dtypes", - ":linalg_ops", + ":linalg_ops_gen", + ":linalg_ops_impl", ":math_ops", ":nn_ops", ":random_ops", @@ -1997,7 +1998,22 @@ py_library( ":array_ops", ":dtypes", ":framework_ops", + ":functional_ops", ":linalg_ops_gen", + ":linalg_ops_impl", + ":math_ops", + "//third_party/py/numpy", + ], +) + +py_library( + name = "linalg_ops_impl", + srcs = ["ops/linalg_ops_impl.py"], + srcs_version = "PY2AND3", + deps = [ + ":array_ops", + ":dtypes", + ":framework_ops", ":math_ops", "//third_party/py/numpy", ], @@ -3493,6 +3509,7 @@ tf_py_wrap_cc( "//tensorflow/core/profiler/internal:print_model_analysis", "//tensorflow/tools/graph_transforms:transform_graph_lib", "//tensorflow/python/eager:pywrap_tfe_lib", + "//tensorflow/python/eager:python_eager_op_gen", "//util/python:python_headers", ] + (tf_additional_lib_deps() + tf_additional_plugin_deps() + diff --git a/tensorflow/python/debug/cli/readline_ui.py b/tensorflow/python/debug/cli/readline_ui.py index 151638789f..3296e45d07 100644 --- a/tensorflow/python/debug/cli/readline_ui.py +++ b/tensorflow/python/debug/cli/readline_ui.py @@ -19,6 +19,8 @@ from __future__ import print_function import readline +import six + from tensorflow.python.debug.cli import base_ui from tensorflow.python.debug.cli import debugger_cli_common @@ -39,11 +41,7 @@ class ReadlineUI(base_ui.BaseUI): readline.set_completer(self._readline_complete) readline.parse_and_bind("tab: complete") - # For Python 2-3 compatibility. - try: - self._input = raw_input - except NameError: - self._input = input + self._input = six.moves.input def _readline_complete(self, text, state): context, prefix, except_last_word = self._analyze_tab_complete_input(text) diff --git a/tensorflow/python/debug/wrappers/grpc_wrapper.py b/tensorflow/python/debug/wrappers/grpc_wrapper.py index fb9494f576..1f9c8fa5a9 100644 --- a/tensorflow/python/debug/wrappers/grpc_wrapper.py +++ b/tensorflow/python/debug/wrappers/grpc_wrapper.py @@ -21,6 +21,8 @@ import signal import sys import traceback +import six + # Google-internal import(s). from tensorflow.python.debug.lib import common from tensorflow.python.debug.wrappers import framework @@ -140,14 +142,9 @@ class GrpcDebugWrapperSession(framework.NonInteractiveDebugWrapperSession): def _signal_handler(unused_signal, unused_frame): - try: - input_func = raw_input - except NameError: - # Python 3 does not have raw_input. - input_func = input - while True: - response = input_func("\nSIGINT received. Quit program? (Y/n): ").strip() + response = six.moves.input( + "\nSIGINT received. Quit program? (Y/n): ").strip() if response in ("", "Y", "y"): sys.exit(0) elif response in ("N", "n"): diff --git a/tensorflow/python/debug/wrappers/hooks.py b/tensorflow/python/debug/wrappers/hooks.py index 6705cd31e2..5e4604fda4 100644 --- a/tensorflow/python/debug/wrappers/hooks.py +++ b/tensorflow/python/debug/wrappers/hooks.py @@ -31,15 +31,18 @@ from tensorflow.python.training import session_run_hook class LocalCLIDebugHook(session_run_hook.SessionRunHook): """Command-line-interface debugger hook. - Can be used as a monitor/hook for `tf.train.MonitoredSession`s and - `tf.contrib.learn`'s `Estimator`s and `Experiment`s. + Can be used as a hook for `tf.train.MonitoredSession`s and + `tf.estimator.Estimator`s. Provides a substitute for + `tfdbg.LocalCLIDebugWrapperSession` in cases where the session is not directly + available. """ def __init__(self, ui_type="curses", dump_root=None, thread_name_filter=None): """Create a local debugger command-line interface (CLI) hook. Args: - ui_type: (str) user-interface type. + ui_type: (`str`) requested user-interface type. Currently supported: + (curses | readline). dump_root: (`str`) optional path to the dump root directory. Must be a directory that does not exist or an empty directory. If the directory does not exist, it will be created by the debugger core during debug @@ -153,8 +156,8 @@ class LocalCLIDebugHook(session_run_hook.SessionRunHook): class DumpingDebugHook(session_run_hook.SessionRunHook): """A debugger hook that dumps debug data to filesystem. - Can be used as a monitor/hook for `tf.train.MonitoredSession`s and - `tf.contrib.learn`'s `Estimator`s and `Experiment`s. + Can be used as a hook for `tf.train.MonitoredSession`s and + `tf.estimator.Estimator`s. """ def __init__(self, @@ -229,8 +232,8 @@ class GrpcDebugHook(session_run_hook.SessionRunHook): When the arguments of debug_utils.watch_graph changes, strongly consider changing arguments here too so that features are available to tflearn users. - Can be used as a monitor/hook for `tf.train.MonitoredSession`s and - `tf.contrib.learn`'s `Estimator`s and `Experiment`s. + Can be used as a hook for `tf.train.MonitoredSession`s and + `tf.estimator.Estimator`s. """ def __init__(self, diff --git a/tensorflow/python/estimator/canned/head.py b/tensorflow/python/estimator/canned/head.py index c365ea8b4a..efa4bdf598 100644 --- a/tensorflow/python/estimator/canned/head.py +++ b/tensorflow/python/estimator/canned/head.py @@ -263,9 +263,12 @@ def _check_dense_labels_match_logits_and_reshape( if (dim1 is not None) and (dim1 != expected_labels_dimension): raise ValueError( 'Mismatched label shape. ' - 'Classifier configured with n_classes=%s. Received %s. ' - 'Suggested Fix: check your n_classes argument to the estimator ' - 'and/or the shape of your label.' % + 'Expected labels dimension=%s. Received %s. ' + 'Suggested Fix:' + 'If your classifier expects one-hot encoding label,' + 'check your n_classes argument to the estimator' + 'and/or the shape of your label.' + 'Otherwise, check the shape of your label.' % (expected_labels_dimension, dim1)) expected_labels_shape = array_ops.concat( [logits_shape[:-1], [expected_labels_dimension]], axis=0) diff --git a/tensorflow/python/estimator/estimator.py b/tensorflow/python/estimator/estimator.py index 351fcb6423..2f1212d5a2 100644 --- a/tensorflow/python/estimator/estimator.py +++ b/tensorflow/python/estimator/estimator.py @@ -207,7 +207,8 @@ class Estimator(object): else: self._session_config = self._config.session_config - self._device_fn = _get_replica_device_setter(self._config) + self._device_fn = self._config.device_fn or \ + _get_replica_device_setter(self._config) if model_fn is None: raise ValueError('model_fn must be provided to Estimator.') @@ -716,7 +717,7 @@ class Estimator(object): batch_length = batch_length or value.shape[0] if value.shape[0] != batch_length: raise ValueError('Batch length of predictions should be same. %s has ' - 'different batch length then others.' % key) + 'different batch length than others.' % key) return batch_length def _extract_keys(self, predictions, predict_keys): diff --git a/tensorflow/python/estimator/run_config.py b/tensorflow/python/estimator/run_config.py index dab442aeda..8162b249f1 100644 --- a/tensorflow/python/estimator/run_config.py +++ b/tensorflow/python/estimator/run_config.py @@ -27,11 +27,13 @@ import six from tensorflow.core.protobuf import config_pb2 from tensorflow.python.platform import tf_logging as logging from tensorflow.python.training import server_lib +from tensorflow.python.estimator import util from tensorflow.python.util import compat_internal from tensorflow.python.util.tf_export import tf_export _USE_DEFAULT = object() +_VALID_DEVICE_FN_ARGS = set(['op']) # A list of the property names in RunConfig that the user is allowed to change. _DEFAULT_REPLACEABLE_LIST = [ @@ -44,7 +46,8 @@ _DEFAULT_REPLACEABLE_LIST = [ 'keep_checkpoint_max', 'keep_checkpoint_every_n_hours', 'log_step_count_steps', - 'train_distribute' + 'train_distribute', + 'device_fn' ] _SAVE_CKPT_ERR = ( @@ -279,6 +282,11 @@ def _validate_properties(run_config): _validate('tf_random_seed', lambda seed: isinstance(seed, six.integer_types), message='tf_random_seed must be integer.') + _validate('device_fn', lambda device_fn: six.callable(device_fn) and + set(util.fn_args(device_fn)) == _VALID_DEVICE_FN_ARGS, + message='device_fn must be callable with exactly' + ' one argument "op".') + class TaskType(object): MASTER = 'master' @@ -302,7 +310,8 @@ class RunConfig(object): keep_checkpoint_max=5, keep_checkpoint_every_n_hours=10000, log_step_count_steps=100, - train_distribute=None): + train_distribute=None, + device_fn=None): """Constructs a RunConfig. All distributed training related properties `cluster_spec`, `is_chief`, @@ -430,6 +439,10 @@ class RunConfig(object): `tf.contrib.distribute.DistributionStrategy`. If specified, then Estimator will distribute the user's model during training, according to the policy specified by that strategy. + device_fn: A callable invoked for every `Operation` that takes the + `Operation` and returns the device string. If `None`, defaults to + the device function returned by `tf.train.replica_device_setter` + with round-robin strategy. Raises: ValueError: If both `save_checkpoints_steps` and `save_checkpoints_secs` @@ -466,7 +479,8 @@ class RunConfig(object): keep_checkpoint_max=keep_checkpoint_max, keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours, log_step_count_steps=log_step_count_steps, - train_distribute=train_distribute) + train_distribute=train_distribute, + device_fn=device_fn) self._init_distributed_setting_from_environment_var(tf_config) @@ -568,6 +582,16 @@ class RunConfig(object): def cluster_spec(self): return self._cluster_spec + @property + def device_fn(self): + """Returns the device_fn. + + If device_fn is not `None`, it overrides the default + device function used in `Estimator`. + Otherwise the default one is used. + """ + return self._device_fn + @property def evaluation_master(self): return self._evaluation_master @@ -697,7 +721,8 @@ class RunConfig(object): - `keep_checkpoint_max`, - `keep_checkpoint_every_n_hours`, - `log_step_count_steps`, - - `train_distribute`. + - `train_distribute`, + - `device_fn`. In addition, either `save_checkpoints_steps` or `save_checkpoints_secs` can be set (should not be both). diff --git a/tensorflow/python/estimator/run_config_test.py b/tensorflow/python/estimator/run_config_test.py index a3eef4c53f..c8b12605e1 100644 --- a/tensorflow/python/estimator/run_config_test.py +++ b/tensorflow/python/estimator/run_config_test.py @@ -42,6 +42,7 @@ _SESSION_CONFIG_ERR = 'session_config must be instance of ConfigProto' _KEEP_CKPT_MAX_ERR = 'keep_checkpoint_max should be >= 0' _KEEP_CKPT_HOURS_ERR = 'keep_checkpoint_every_n_hours should be > 0' _TF_RANDOM_SEED_ERR = 'tf_random_seed must be integer' +_DEVICE_FN_ERR = 'device_fn must be callable with exactly one argument "op".' _ONE_CHIEF_ERR = 'The "cluster" in TF_CONFIG must have only one "chief" node.' _ONE_MASTER_ERR = 'The "cluster" in TF_CONFIG must have only one "master" node.' _INVALID_TASK_TYPE_FOR_EVAL_MASTER = ( @@ -83,6 +84,7 @@ class RunConfigTest(test.TestCase): self.assertEqual(5, config.keep_checkpoint_max) self.assertEqual(10000, config.keep_checkpoint_every_n_hours) self.assertIsNone(config.service) + self.assertIsNone(config.device_fn) def test_model_dir(self): empty_config = run_config_lib.RunConfig() @@ -93,6 +95,7 @@ class RunConfigTest(test.TestCase): def test_replace_with_allowed_properties(self): session_config = config_pb2.ConfigProto(allow_soft_placement=True) + device_fn = lambda op: "/cpu:0" config = run_config_lib.RunConfig().replace( tf_random_seed=11, @@ -100,13 +103,15 @@ class RunConfigTest(test.TestCase): save_checkpoints_secs=14, session_config=session_config, keep_checkpoint_max=16, - keep_checkpoint_every_n_hours=17) + keep_checkpoint_every_n_hours=17, + device_fn=device_fn) self.assertEqual(11, config.tf_random_seed) self.assertEqual(12, config.save_summary_steps) self.assertEqual(14, config.save_checkpoints_secs) self.assertEqual(session_config, config.session_config) self.assertEqual(16, config.keep_checkpoint_max) self.assertEqual(17, config.keep_checkpoint_every_n_hours) + self.assertEqual(device_fn, config.device_fn) def test_replace_none_value(self): config = run_config_lib.RunConfig().replace( @@ -117,7 +122,8 @@ class RunConfigTest(test.TestCase): save_checkpoints_steps=None, session_config=None, keep_checkpoint_max=None, - keep_checkpoint_every_n_hours=None) + keep_checkpoint_every_n_hours=None, + device_fn=None) self.assertIsNone(config.tf_random_seed) self.assertIsNone(config.model_dir) self.assertIsNone(config.save_summary_steps) @@ -126,6 +132,7 @@ class RunConfigTest(test.TestCase): self.assertIsNone(config.session_config) self.assertIsNone(config.keep_checkpoint_max) self.assertIsNone(config.keep_checkpoint_every_n_hours) + self.assertIsNone(config.device_fn) def test_replace_with_disallowallowed_properties(self): config = run_config_lib.RunConfig() @@ -166,9 +173,12 @@ class RunConfigTest(test.TestCase): config.replace(keep_checkpoint_every_n_hours=0) with self.assertRaisesRegexp(ValueError, _TF_RANDOM_SEED_ERR): config.replace(tf_random_seed=1.0) + with self.assertRaisesRegexp(ValueError, _DEVICE_FN_ERR): + config.replace(device_fn=lambda x, y: 0) def test_init_with_allowed_properties(self): session_config = config_pb2.ConfigProto(allow_soft_placement=True) + device_fn = lambda op: "/cpu:0" config = run_config_lib.RunConfig( tf_random_seed=11, @@ -176,13 +186,15 @@ class RunConfigTest(test.TestCase): save_checkpoints_secs=14, session_config=session_config, keep_checkpoint_max=16, - keep_checkpoint_every_n_hours=17) + keep_checkpoint_every_n_hours=17, + device_fn=device_fn) self.assertEqual(11, config.tf_random_seed) self.assertEqual(12, config.save_summary_steps) self.assertEqual(14, config.save_checkpoints_secs) self.assertEqual(session_config, config.session_config) self.assertEqual(16, config.keep_checkpoint_max) self.assertEqual(17, config.keep_checkpoint_every_n_hours) + self.assertEqual(device_fn, config.device_fn) def test_init_none_value(self): config = run_config_lib.RunConfig( @@ -193,7 +205,8 @@ class RunConfigTest(test.TestCase): save_checkpoints_steps=None, session_config=None, keep_checkpoint_max=None, - keep_checkpoint_every_n_hours=None) + keep_checkpoint_every_n_hours=None, + device_fn=None) self.assertIsNone(config.tf_random_seed) self.assertIsNone(config.model_dir) self.assertIsNone(config.save_summary_steps) @@ -202,6 +215,7 @@ class RunConfigTest(test.TestCase): self.assertIsNone(config.session_config) self.assertIsNone(config.keep_checkpoint_max) self.assertIsNone(config.keep_checkpoint_every_n_hours) + self.assertIsNone(config.device_fn) def test_init_invalid_values(self): with self.assertRaisesRegexp(ValueError, _MODEL_DIR_ERR): @@ -220,6 +234,8 @@ class RunConfigTest(test.TestCase): run_config_lib.RunConfig(keep_checkpoint_every_n_hours=0) with self.assertRaisesRegexp(ValueError, _TF_RANDOM_SEED_ERR): run_config_lib.RunConfig(tf_random_seed=1.0) + with self.assertRaisesRegexp(ValueError, _DEVICE_FN_ERR): + run_config_lib.RunConfig(device_fn=lambda x: "/cpu:0") class RunConfigDistributedSettingTest(test.TestCase): diff --git a/tensorflow/python/feature_column/feature_column.py b/tensorflow/python/feature_column/feature_column.py index a7c4eabcb2..c16c3cda48 100644 --- a/tensorflow/python/feature_column/feature_column.py +++ b/tensorflow/python/feature_column/feature_column.py @@ -162,7 +162,6 @@ from tensorflow.python.platform import tf_logging as logging from tensorflow.python.training import checkpoint_utils from tensorflow.python.util import nest from tensorflow.python.util.tf_export import tf_export -from tensorflow.python.util.tf_export import tf_export def _internal_input_layer(features, diff --git a/tensorflow/python/framework/dtypes.py b/tensorflow/python/framework/dtypes.py index 807582bd7e..7f9ef53457 100644 --- a/tensorflow/python/framework/dtypes.py +++ b/tensorflow/python/framework/dtypes.py @@ -700,11 +700,13 @@ def as_dtype(type_value): if type_value.type == np.string_ or type_value.type == np.unicode_: return string - for key, val in _NP_TO_TF: - try: - if key == type_value: - return val - except TypeError as e: - raise TypeError("Cannot convert {} to a dtype. {}".format(type_value, e)) + if isinstance(type_value, (type, np.dtype)): + for key, val in _NP_TO_TF: + try: + if key == type_value: + return val + except TypeError as e: + raise TypeError("Cannot convert {} to a dtype. {}".format( + type_value, e)) raise TypeError("Cannot convert value %r to a TensorFlow DType." % type_value) diff --git a/tensorflow/python/framework/graph_util_impl.py b/tensorflow/python/framework/graph_util_impl.py index 910364364c..394fac6c85 100644 --- a/tensorflow/python/framework/graph_util_impl.py +++ b/tensorflow/python/framework/graph_util_impl.py @@ -285,7 +285,7 @@ def convert_variables_to_constants(sess, output_graph_def.node.extend([output_node]) output_graph_def.library.CopyFrom(inference_graph.library) - print("Converted %d variables to const ops." % how_many_converted) + logging.info("Converted %d variables to const ops.", how_many_converted) return output_graph_def diff --git a/tensorflow/python/framework/graph_util_test.py b/tensorflow/python/framework/graph_util_test.py index b618152b02..2dafb94ba7 100644 --- a/tensorflow/python/framework/graph_util_test.py +++ b/tensorflow/python/framework/graph_util_test.py @@ -209,7 +209,7 @@ class DeviceFunctionsTest(test.TestCase): defun_node, 2.0, name="output_node") with session.Session() as sess: - init = variables.initialize_variables([variable_node]) + init = variables.variables_initializer([variable_node]) sess.run(init) output = sess.run(output_node) self.assertNear(4.0, output, 0.00001) diff --git a/tensorflow/python/framework/load_library.py b/tensorflow/python/framework/load_library.py index 535c6017f5..9a8477debb 100644 --- a/tensorflow/python/framework/load_library.py +++ b/tensorflow/python/framework/load_library.py @@ -58,7 +58,7 @@ def load_op_library(library_filename): op_list_str = py_tf.TF_GetOpList(lib_handle) op_list = op_def_pb2.OpList() op_list.ParseFromString(compat.as_bytes(op_list_str)) - wrappers = py_tf.GetPythonWrappers(op_list_str) + wrappers = py_tf.GetEagerPythonWrappers(op_list_str) # Delete the library handle to release any memory held in C # that are no longer needed. diff --git a/tensorflow/python/framework/python_op_gen.i b/tensorflow/python/framework/python_op_gen.i index 26ec4e8e66..efcce2f209 100644 --- a/tensorflow/python/framework/python_op_gen.i +++ b/tensorflow/python/framework/python_op_gen.i @@ -16,10 +16,10 @@ limitations under the License. %include "tensorflow/python/platform/base.i" %{ -#include "tensorflow/python/framework/python_op_gen.h" +#include "tensorflow/python/eager/python_eager_op_gen.h" %} -// Input typemap for GetPythonWrappers. +// Input typemap for GetEagerPythonWrappers. // Accepts a python object of 'bytes' type, and converts it to // a const char* pointer and size_t length. The default typemap // going from python bytes to const char* tries to decode the @@ -37,5 +37,5 @@ limitations under the License. %ignoreall; -%unignore tensorflow::GetPythonWrappers; -%include "tensorflow/python/framework/python_op_gen.h" +%unignore tensorflow::GetEagerPythonWrappers; +%include "tensorflow/python/eager/python_eager_op_gen.h" diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py index f954b9d6c7..5a8bc43727 100644 --- a/tensorflow/python/framework/test_util.py +++ b/tensorflow/python/framework/test_util.py @@ -1014,6 +1014,8 @@ class TensorFlowTestCase(googletest.TestCase): config.graph_options.optimizer_options.opt_level = -1 config.graph_options.rewrite_options.constant_folding = ( rewriter_config_pb2.RewriterConfig.OFF) + config.graph_options.rewrite_options.arithmetic_optimization = ( + rewriter_config_pb2.RewriterConfig.OFF) return config if graph is None: diff --git a/tensorflow/python/grappler/layout_optimizer_test.py b/tensorflow/python/grappler/layout_optimizer_test.py index 5a84b16a23..e3dd4b0bdf 100644 --- a/tensorflow/python/grappler/layout_optimizer_test.py +++ b/tensorflow/python/grappler/layout_optimizer_test.py @@ -476,7 +476,7 @@ class LayoutOptimizerTest(test.TestCase): random_seed.set_random_seed(0) x = random_ops.truncated_normal([1, 784], seed=0) conv = _two_layer_model(x) - reduce_sum = math_ops.reduce_sum(conv, axis=[1, 2], keep_dims=True) + reduce_sum = math_ops.reduce_sum(conv, axis=[1, 2], keepdims=True) squeeze = array_ops.squeeze(reduce_sum, axis=[1, 2]) output = array_ops.identity(squeeze) @@ -506,7 +506,7 @@ class LayoutOptimizerTest(test.TestCase): random_seed.set_random_seed(0) x = random_ops.truncated_normal([1, 784], seed=0) conv = _two_layer_model(x) - reduce_sum = math_ops.reduce_sum(conv, axis=[0, 1, 2], keep_dims=True) + reduce_sum = math_ops.reduce_sum(conv, axis=[0, 1, 2], keepdims=True) squeeze = array_ops.squeeze(reduce_sum, axis=[0, 1, 2]) output = array_ops.identity(squeeze) @@ -623,7 +623,7 @@ class LayoutOptimizerTest(test.TestCase): random_seed.set_random_seed(0) x = random_ops.truncated_normal([1, 784], seed=0) conv = _two_layer_model(x) - reduce_sum = math_ops.reduce_sum(conv, axis=[3], keep_dims=True) + reduce_sum = math_ops.reduce_sum(conv, axis=[3], keepdims=True) output = array_ops.identity(reduce_sum) with session.Session(config=_get_config(False)) as sess: @@ -653,7 +653,7 @@ class LayoutOptimizerTest(test.TestCase): random_seed.set_random_seed(0) x = random_ops.truncated_normal([1, 784], seed=0) conv = _two_layer_model(x) - reduce_sum = math_ops.reduce_sum(conv, axis=[2], keep_dims=True) + reduce_sum = math_ops.reduce_sum(conv, axis=[2], keepdims=True) output = array_ops.identity(reduce_sum) with session.Session(config=_get_config(False)) as sess: @@ -682,7 +682,7 @@ class LayoutOptimizerTest(test.TestCase): random_seed.set_random_seed(0) x = random_ops.truncated_normal([1, 784], seed=0) conv = _two_layer_model(x) - reduce_sum = math_ops.reduce_sum(conv, axis=[2, 3], keep_dims=True) + reduce_sum = math_ops.reduce_sum(conv, axis=[2, 3], keepdims=True) output = array_ops.identity(reduce_sum) with session.Session(config=_get_config(False)) as sess: diff --git a/tensorflow/python/keras/_impl/keras/backend.py b/tensorflow/python/keras/_impl/keras/backend.py index 81a4d2f820..449410fe08 100644 --- a/tensorflow/python/keras/_impl/keras/backend.py +++ b/tensorflow/python/keras/_impl/keras/backend.py @@ -3448,7 +3448,7 @@ def categorical_crossentropy(target, output, from_logits=False): Returns: Output tensor. """ - # Note: nn.softmax_cross_entropy_with_logits + # Note: nn.softmax_cross_entropy_with_logits_v2 # expects logits, Keras expects probabilities. if not from_logits: # scale preds so that the class probas of each sample sum to 1 @@ -3512,7 +3512,7 @@ def binary_crossentropy(target, output, from_logits=False): Returns: A tensor. """ - # Note: nn.softmax_cross_entropy_with_logits + # Note: nn.sigmoid_cross_entropy_with_logits # expects logits, Keras expects probabilities. if not from_logits: # transform back to logits diff --git a/tensorflow/python/keras/_impl/keras/layers/normalization.py b/tensorflow/python/keras/_impl/keras/layers/normalization.py index 5462a95d7d..c16fc07fb4 100644 --- a/tensorflow/python/keras/_impl/keras/layers/normalization.py +++ b/tensorflow/python/keras/_impl/keras/layers/normalization.py @@ -593,9 +593,9 @@ class BatchNormalization(Layer): # used during evaluation, it is more efficient to just update in one # step and should not make a significant difference in the result. new_mean = math_ops.reduce_mean(new_mean, - axis=1, keep_dims=True) + axis=1, keepdims=True) new_variance = math_ops.reduce_mean(new_variance, - axis=1, keep_dims=True) + axis=1, keepdims=True) def _do_update(var, value): if in_eager_mode and not self.trainable: diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index ebbec39cf3..c03c514699 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -917,6 +917,20 @@ tf_py_test( ], ) +tf_py_test( + name = "string_strip_op_test", + size = "small", + srcs = ["string_strip_op_test.py"], + additional_deps = [ + "//third_party/py/numpy", + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:errors", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:string_ops", + ], +) + tf_py_test( name = "substr_op_test", size = "small", @@ -1195,6 +1209,18 @@ cuda_py_test( ], ) +cuda_py_test( + name = "broadcast_to_ops_test", + size = "small", + srcs = ["broadcast_to_ops_test.py"], + additional_deps = [ + "//third_party/py/numpy", + "//tensorflow/python:array_ops", + "//tensorflow/python:client", + "//tensorflow/python:client_testlib", + ], +) + cuda_py_test( name = "inplace_ops_test", size = "small", diff --git a/tensorflow/python/kernel_tests/broadcast_to_ops_test.py b/tensorflow/python/kernel_tests/broadcast_to_ops_test.py new file mode 100644 index 0000000000..6a1bd958ba --- /dev/null +++ b/tensorflow/python/kernel_tests/broadcast_to_ops_test.py @@ -0,0 +1,85 @@ +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for broadcast_to ops.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import test_util +from tensorflow.python.ops import array_ops +from tensorflow.python.platform import test as test_lib + + +class BroadcastToTest(test_util.TensorFlowTestCase): + + def testBroadcastToBasic(self): + for dtype in [np.uint8, np.uint16, np.int8, np.int16, np.int32, np.int64]: + with self.test_session(use_gpu=True): + x = np.array([1, 2, 3], dtype=dtype) + v_tf = array_ops.broadcast_to(constant_op.constant(x), [3, 3]) + v_np = np.broadcast_to(x, [3, 3]) + self.assertAllEqual(v_tf.eval(), v_np) + + def testBroadcastToString(self): + with self.test_session(use_gpu=True): + x = np.array([b"1", b"2", b"3"]) + v_tf = array_ops.broadcast_to(constant_op.constant(x), [3, 3]) + v_np = np.broadcast_to(x, [3, 3]) + self.assertAllEqual(v_tf.eval(), v_np) + + def testBroadcastToBool(self): + with self.test_session(use_gpu=True): + x = np.array([True, False, True], dtype=np.bool) + v_tf = array_ops.broadcast_to(constant_op.constant(x), [3, 3]) + v_np = np.broadcast_to(x, [3, 3]) + self.assertAllEqual(v_tf.eval(), v_np) + + def testBroadcastToShape(self): + for input_dim in range(1, 6): + for output_dim in range(input_dim, 6): + with self.test_session(use_gpu=True): + input_shape = [2] * input_dim + output_shape = [2] * output_dim + x = np.array(np.random.randint(5, size=input_shape), dtype=np.int32) + v_tf = array_ops.broadcast_to(constant_op.constant(x), output_shape) + v_np = np.broadcast_to(x, output_shape) + self.assertAllEqual(v_tf.eval(), v_np) + + def testBroadcastToScalar(self): + with self.test_session(use_gpu=True): + x = np.array(1, dtype=np.int32) + v_tf = array_ops.broadcast_to(constant_op.constant(x), [3, 3]) + v_np = np.broadcast_to(x, [3, 3]) + self.assertAllEqual(v_tf.eval(), v_np) + + def testBroadcastToShapeTypeAndInference(self): + for dtype in [dtypes.int32, dtypes.int64]: + with self.test_session(use_gpu=True): + x = np.array([1, 2, 3]) + v_tf = array_ops.broadcast_to( + constant_op.constant(x), + constant_op.constant([3, 3], dtype=dtype)) + shape = v_tf.get_shape().as_list() + v_np = np.broadcast_to(x, [3, 3]) + self.assertAllEqual(v_tf.eval(), v_np) + # check shape inference when shape input is constant + self.assertAllEqual(shape, v_np.shape) + +if __name__ == "__main__": + test_lib.main() diff --git a/tensorflow/python/kernel_tests/confusion_matrix_test.py b/tensorflow/python/kernel_tests/confusion_matrix_test.py index 670a625f0f..79e419867d 100644 --- a/tensorflow/python/kernel_tests/confusion_matrix_test.py +++ b/tensorflow/python/kernel_tests/confusion_matrix_test.py @@ -19,6 +19,7 @@ from __future__ import division from __future__ import print_function import numpy as np +from six.moves import xrange # pylint: disable=redefined-builtin from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes @@ -104,11 +105,7 @@ class ConfusionMatrixTest(test.TestCase): d, l, cm_out = sess.run([data, lab, cm], {m_neg: 0.0, m_pos: 1.0, s: 1.0}) truth = np.zeros([2, 2], dtype=np_dtype) - try: - range_builder = xrange - except NameError: # In Python 3. - range_builder = range - for i in range_builder(len(d)): + for i in xrange(len(d)): truth[l[i], d[i]] += 1 self.assertEqual(cm_out.dtype, np_dtype) diff --git a/tensorflow/python/kernel_tests/constant_op_test.py b/tensorflow/python/kernel_tests/constant_op_test.py index 749313b00d..107ee37fab 100644 --- a/tensorflow/python/kernel_tests/constant_op_test.py +++ b/tensorflow/python/kernel_tests/constant_op_test.py @@ -65,6 +65,11 @@ class ConstantTest(test.TestCase): self._testCpu(x) self._testGpu(x) + def testInvalidDType(self): + # Test case for GitHub issue 18474 + with self.assertRaises(TypeError): + constant_op.constant(dtypes_lib.string, "[,]") + def testBFloat16(self): bfloat16 = dtypes_lib.bfloat16.as_numpy_dtype self._testAll(np.arange(-15, 15).reshape([2, 3, 5]).astype(bfloat16)) diff --git a/tensorflow/python/kernel_tests/conv3d_transpose_test.py b/tensorflow/python/kernel_tests/conv3d_transpose_test.py index a8b3af5096..8973a450fa 100644 --- a/tensorflow/python/kernel_tests/conv3d_transpose_test.py +++ b/tensorflow/python/kernel_tests/conv3d_transpose_test.py @@ -119,6 +119,18 @@ class Conv3DTransposeTest(test.TestCase): target = 3.0 self.assertAllClose(target, value[n, d, h, w, k]) + def testConv3DTransposeShapeMismatch(self): + # Test case for GitHub issue 18460 + x_shape = [2, 2, 3, 4, 3] + f_shape = [3, 3, 3, 2, 2] + y_shape = [2, 2, 6, 8, 6] + strides = [1, 1, 2, 2, 2] + np.random.seed(1) + x_value = np.random.random_sample(x_shape).astype(np.float64) + f_value = np.random.random_sample(f_shape).astype(np.float64) + nn_ops.conv3d_transpose( + x_value, f_value, y_shape, strides, data_format='NCDHW') + def testConv3DTransposeValid(self): with self.test_session(): strides = [1, 2, 2, 2, 1] diff --git a/tensorflow/python/kernel_tests/manip_ops_test.py b/tensorflow/python/kernel_tests/manip_ops_test.py index b8200ac0cb..f31426713c 100644 --- a/tensorflow/python/kernel_tests/manip_ops_test.py +++ b/tensorflow/python/kernel_tests/manip_ops_test.py @@ -20,8 +20,10 @@ from __future__ import print_function import numpy as np from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors_impl from tensorflow.python.framework import test_util +from tensorflow.python.ops import array_ops from tensorflow.python.ops import gradient_checker from tensorflow.python.ops import manip_ops from tensorflow.python.platform import test as test_lib @@ -88,41 +90,78 @@ class RollTest(test_util.TensorFlowTestCase): x = np.random.rand(3, 2, 1, 1).astype(t) self._testAll(x + 1j * x, [2, 1, 1, 0], [0, 3, 1, 2]) + def testNegativeAxis(self): + self._testAll(np.random.randint(-100, 100, (5)).astype(np.int32), 3, -1) + self._testAll(np.random.randint(-100, 100, (4, 4)).astype(np.int32), 3, -2) + # Make sure negative axis shoudl be 0 <= axis + dims < dims + with self.test_session(): + with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, + "is out of range"): + manip_ops.roll(np.random.randint(-100, 100, (4, 4)).astype(np.int32), + 3, -10).eval() + + def testInvalidInputShape(self): + # The input should be 1-D or higher, checked in shape function. + with self.assertRaisesRegexp( + ValueError, "Shape must be at least rank 1 but is rank 0"): + manip_ops.roll(7, 1, 0) + def testRollInputMustVectorHigherRaises(self): - tensor = 7 + # The input should be 1-D or higher, checked in kernel. + tensor = array_ops.placeholder(dtype=dtypes.int32) shift = 1 axis = 0 with self.test_session(): with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, "input must be 1-D or higher"): - manip_ops.roll(tensor, shift, axis).eval() + manip_ops.roll(tensor, shift, axis).eval(feed_dict={tensor: 7}) + + def testInvalidAxisShape(self): + # The axis should be a scalar or 1-D, checked in shape function. + with self.assertRaisesRegexp( + ValueError, "Shape must be at most rank 1 but is rank 2"): + manip_ops.roll([[1, 2], [3, 4]], 1, [[0, 1]]) def testRollAxisMustBeScalarOrVectorRaises(self): + # The axis should be a scalar or 1-D, checked in kernel. tensor = [[1, 2], [3, 4]] shift = 1 - axis = [[0, 1]] + axis = array_ops.placeholder(dtype=dtypes.int32) with self.test_session(): with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, "axis must be a scalar or a 1-D vector"): - manip_ops.roll(tensor, shift, axis).eval() + manip_ops.roll(tensor, shift, axis).eval(feed_dict={axis: [[0, 1]]}) + + def testInvalidShiftShape(self): + # The shift should be a scalar or 1-D, checked in shape function. + with self.assertRaisesRegexp( + ValueError, "Shape must be at most rank 1 but is rank 2"): + manip_ops.roll([[1, 2], [3, 4]], [[0, 1]], 1) def testRollShiftMustBeScalarOrVectorRaises(self): + # The shift should be a scalar or 1-D, checked in kernel. tensor = [[1, 2], [3, 4]] - shift = [[0, 1]] + shift = array_ops.placeholder(dtype=dtypes.int32) axis = 1 with self.test_session(): with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, "shift must be a scalar or a 1-D vector"): - manip_ops.roll(tensor, shift, axis).eval() + manip_ops.roll(tensor, shift, axis).eval(feed_dict={shift: [[0, 1]]}) + + def testInvalidShiftAndAxisNotEqualShape(self): + # The shift and axis must be same size, checked in shape function. + with self.assertRaisesRegexp(ValueError, "both shapes must be equal"): + manip_ops.roll([[1, 2], [3, 4]], [1], [0, 1]) def testRollShiftAndAxisMustBeSameSizeRaises(self): + # The shift and axis must be same size, checked in kernel. tensor = [[1, 2], [3, 4]] - shift = [1] + shift = array_ops.placeholder(dtype=dtypes.int32) axis = [0, 1] with self.test_session(): with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, "shift and axis must have the same size"): - manip_ops.roll(tensor, shift, axis).eval() + manip_ops.roll(tensor, shift, axis).eval(feed_dict={shift: [1]}) def testRollAxisOutOfRangeRaises(self): tensor = [1, 2] diff --git a/tensorflow/python/kernel_tests/norm_op_test.py b/tensorflow/python/kernel_tests/norm_op_test.py index d85512fae6..3f71b326a2 100644 --- a/tensorflow/python/kernel_tests/norm_op_test.py +++ b/tensorflow/python/kernel_tests/norm_op_test.py @@ -37,17 +37,17 @@ class NormOpTest(test_lib.TestCase): def testBadOrder(self): matrix = [[0., 1.], [2., 3.]] - for ord_ in "foo", -7, -1.1, 0: + for ord_ in "fro", -7, -1.1, 0: with self.assertRaisesRegexp(ValueError, "'ord' must be a supported vector norm"): - linalg_ops.norm(matrix, ord="fro") + linalg_ops.norm(matrix, ord=ord_) - for ord_ in "foo", -7, -1.1, 0: + for ord_ in "fro", -7, -1.1, 0: with self.assertRaisesRegexp(ValueError, "'ord' must be a supported vector norm"): linalg_ops.norm(matrix, ord=ord_, axis=-1) - for ord_ in 1.1, 2: + for ord_ in "foo", -7, -1.1, 1.1: with self.assertRaisesRegexp(ValueError, "'ord' must be a supported matrix norm"): linalg_ops.norm(matrix, ord=ord_, axis=[-2, -1]) @@ -69,14 +69,14 @@ def _GetNormOpTest(dtype_, shape_, ord_, axis_, keep_dims_, use_static_shape_): if use_static_shape_: tf_matrix = constant_op.constant(matrix) tf_norm = linalg_ops.norm( - tf_matrix, ord=ord_, axis=axis_, keep_dims=keep_dims_) + tf_matrix, ord=ord_, axis=axis_, keepdims=keep_dims_) tf_norm_val = sess.run(tf_norm) else: tf_matrix = array_ops.placeholder(dtype_) tf_norm = linalg_ops.norm( - tf_matrix, ord=ord_, axis=axis_, keep_dims=keep_dims_) + tf_matrix, ord=ord_, axis=axis_, keepdims=keep_dims_) tf_norm_val = sess.run(tf_norm, feed_dict={tf_matrix: matrix}) - self.assertAllClose(np_norm, tf_norm_val) + self.assertAllClose(np_norm, tf_norm_val, rtol=1e-5, atol=1e-5) def Test(self): is_matrix_norm = (isinstance(axis_, tuple) or @@ -85,8 +85,6 @@ def _GetNormOpTest(dtype_, shape_, ord_, axis_, keep_dims_, use_static_shape_): if ((not is_matrix_norm and ord_ == "fro") or (is_matrix_norm and is_fancy_p_norm)): self.skipTest("Not supported by neither numpy.linalg.norm nor tf.norm") - if is_matrix_norm and ord_ == 2: - self.skipTest("Not supported by tf.norm") if ord_ == 'euclidean' or (axis_ is None and len(shape) > 2): self.skipTest("Not supported by numpy.linalg.norm") matrix = np.random.randn(*shape_).astype(dtype_) diff --git a/tensorflow/python/kernel_tests/py_func_test.py b/tensorflow/python/kernel_tests/py_func_test.py index 5b508b7c0e..b9f44d728a 100644 --- a/tensorflow/python/kernel_tests/py_func_test.py +++ b/tensorflow/python/kernel_tests/py_func_test.py @@ -52,6 +52,38 @@ class PyFuncTest(test.TestCase): """Encapsulates tests for py_func and eager_py_func.""" # ----- Tests for py_func ----- + def testRealDataTypes(self): + def sum_func(x, y): + return x + y + for dtype in [dtypes.float16, dtypes.float32, dtypes.float64, + dtypes.uint8, dtypes.int8, dtypes.uint16, dtypes.int16, + dtypes.int32, dtypes.int64]: + with self.test_session(): + x = constant_op.constant(1, dtype=dtype) + y = constant_op.constant(2, dtype=dtype) + z = self.evaluate(script_ops.py_func(sum_func, [x, y], dtype)) + self.assertEqual(z, 3) + + def testComplexDataTypes(self): + def sub_func(x, y): + return x - y + for dtype in [dtypes.complex64, dtypes.complex128]: + with self.test_session(): + x = constant_op.constant(1 + 1j, dtype=dtype) + y = constant_op.constant(2 - 2j, dtype=dtype) + z = self.evaluate(script_ops.py_func(sub_func, [x, y], dtype)) + self.assertEqual(z, -1 + 3j) + + def testBoolDataTypes(self): + def and_func(x, y): + return x and y + dtype = dtypes.bool + with self.test_session(): + x = constant_op.constant(True, dtype=dtype) + y = constant_op.constant(False, dtype=dtype) + z = self.evaluate(script_ops.py_func(and_func, [x, y], dtype)) + self.assertEqual(z, False) + def testSingleType(self): with self.test_session(): x = constant_op.constant(1.0, dtypes.float32) diff --git a/tensorflow/python/kernel_tests/random/multinomial_op_test.py b/tensorflow/python/kernel_tests/random/multinomial_op_test.py index a9dc7b7de0..051c7d86bf 100644 --- a/tensorflow/python/kernel_tests/random/multinomial_op_test.py +++ b/tensorflow/python/kernel_tests/random/multinomial_op_test.py @@ -46,7 +46,7 @@ def composed_sampler(logits, num_samples): logits = array_ops.expand_dims(logits, -1) # [batch size, num samples] - return math_ops.argmax(logits + noise, dimension=1) + return math_ops.argmax(logits + noise, axis=1) native_sampler = random_ops.multinomial diff --git a/tensorflow/python/kernel_tests/random/random_ops_test.py b/tensorflow/python/kernel_tests/random/random_ops_test.py index df37dd98ec..e4b5c3832a 100644 --- a/tensorflow/python/kernel_tests/random/random_ops_test.py +++ b/tensorflow/python/kernel_tests/random/random_ops_test.py @@ -228,6 +228,17 @@ class RandomUniformTest(test.TestCase): print("count = ", count) self.assertTrue(count < count_limit) + def testUniformIntsWithInvalidShape(self): + for dtype in dtypes.int32, dtypes.int64: + with self.assertRaisesRegexp( + ValueError, "Shape must be rank 0 but is rank 1"): + random_ops.random_uniform( + [1000], minval=[1, 2], maxval=3, dtype=dtype) + with self.assertRaisesRegexp( + ValueError, "Shape must be rank 0 but is rank 1"): + random_ops.random_uniform( + [1000], minval=1, maxval=[2, 3], dtype=dtype) + # Check that uniform ints actually follow a uniform distribution. def testUniformInts(self): minv = -2 diff --git a/tensorflow/python/kernel_tests/string_strip_op_test.py b/tensorflow/python/kernel_tests/string_strip_op_test.py new file mode 100644 index 0000000000..30fd477ff4 --- /dev/null +++ b/tensorflow/python/kernel_tests/string_strip_op_test.py @@ -0,0 +1,56 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for string_strip_op.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.ops import string_ops +from tensorflow.python.platform import test + + +class StringStripOpTest(test.TestCase): + """ Test cases for tf.string_strip.""" + + def test_string_strip(self): + strings = ["pigs on the wing", "animals"] + + with self.test_session() as sess: + output = string_ops.string_strip(strings) + output = sess.run(output) + self.assertAllEqual(output, [b"pigs on the wing", b"animals"]) + + def test_string_strip_2d(self): + strings = [["pigs on the wing", "animals"], + [" hello ", "\n\tworld \r \n"]] + + with self.test_session() as sess: + output = string_ops.string_strip(strings) + output = sess.run(output) + self.assertAllEqual(output, [[b"pigs on the wing", b"animals"], + [b"hello", b"world"]]) + + def test_string_strip_with_empty_strings(self): + strings = [" hello ", "", "world ", " \t \r \n "] + + with self.test_session() as sess: + output = string_ops.string_strip(strings) + output = sess.run(output) + self.assertAllEqual(output, [b"hello", b"", b"world", b""]) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/python/lib/core/py_func.cc b/tensorflow/python/lib/core/py_func.cc index 22317a348c..8c6bb7955a 100644 --- a/tensorflow/python/lib/core/py_func.cc +++ b/tensorflow/python/lib/core/py_func.cc @@ -126,6 +126,9 @@ Status NumericNpDTypeToTfDType(const int np, DataType* tf) { case NPY_INT8: *tf = DT_INT8; break; + case NPY_UINT16: + *tf = DT_UINT16; + break; case NPY_INT16: *tf = DT_INT16; break; diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py index fa26e07c85..ceeabe090d 100644 --- a/tensorflow/python/ops/array_ops.py +++ b/tensorflow/python/ops/array_ops.py @@ -144,6 +144,7 @@ def identity(input, name=None): # pylint: disable=redefined-builtin # pylint: disable=redefined-builtin,protected-access @tf_export("expand_dims") +@deprecation.deprecated_args(None, "Use the `axis` argument instead", "dim") def expand_dims(input, axis=None, name=None, dim=None): """Inserts a dimension of 1 into a tensor's shape. @@ -193,11 +194,7 @@ def expand_dims(input, axis=None, name=None, dim=None): Raises: ValueError: if both `dim` and `axis` are specified. """ - # TODO(aselle): Remove argument dim - if dim is not None: - if axis is not None: - raise ValueError("can't specify both 'dim' and 'axis'") - axis = dim + axis = deprecation.deprecated_argument_lookup("axis", axis, "dim", dim) return gen_array_ops.expand_dims(input, axis, name) @@ -2581,6 +2578,8 @@ def sequence_mask(lengths, maxlen=None, dtype=dtypes.bool, name=None): @tf_export("squeeze") +@deprecation.deprecated_args(None, "Use the `axis` argument instead", + "squeeze_dims") def squeeze(input, axis=None, name=None, squeeze_dims=None): # pylint: disable=redefined-builtin """Removes dimensions of size 1 from the shape of a tensor. @@ -2621,10 +2620,8 @@ def squeeze(input, axis=None, name=None, squeeze_dims=None): Raises: ValueError: When both `squeeze_dims` and `axis` are specified. """ - if squeeze_dims is not None: - if axis is not None: - raise ValueError("Cannot specify both 'squeeze_dims' and 'axis'") - axis = squeeze_dims + axis = deprecation.deprecated_argument_lookup( + "axis", axis, "squeeze_dims", squeeze_dims) if np.isscalar(axis): axis = [axis] return gen_array_ops.squeeze(input, axis, name) diff --git a/tensorflow/python/ops/distributions/categorical.py b/tensorflow/python/ops/distributions/categorical.py index 66fa9e110c..8f25b1149c 100644 --- a/tensorflow/python/ops/distributions/categorical.py +++ b/tensorflow/python/ops/distributions/categorical.py @@ -311,7 +311,7 @@ class Categorical(distribution.Distribution): nn_ops.log_softmax(self.logits) * self.probs, axis=-1) def _mode(self): - ret = math_ops.argmax(self.logits, dimension=self._batch_rank) + ret = math_ops.argmax(self.logits, axis=self._batch_rank) ret = math_ops.cast(ret, self.dtype) ret.set_shape(self.batch_shape) return ret diff --git a/tensorflow/python/ops/embedding_ops.py b/tensorflow/python/ops/embedding_ops.py index f0120f2957..9e46739bc1 100644 --- a/tensorflow/python/ops/embedding_ops.py +++ b/tensorflow/python/ops/embedding_ops.py @@ -331,11 +331,11 @@ def embedding_lookup_sparse(params, representing sharded embedding tensors. Alternatively, a `PartitionedVariable`, created by partitioning along dimension 0. Each element must be appropriately sized for the given `partition_strategy`. - sp_ids: N x M SparseTensor of int64 ids (typically from FeatureValueToId), + sp_ids: N x M `SparseTensor` of int64 ids (typically from FeatureValueToId), where N is typically batch size and M is arbitrary. - sp_weights: either a SparseTensor of float / double weights, or None to - indicate all weights should be taken to be 1. If specified, sp_weights - must have exactly the same shape and indices as sp_ids. + sp_weights: either a `SparseTensor` of float / double weights, or `None` to + indicate all weights should be taken to be 1. If specified, `sp_weights` + must have exactly the same shape and indices as `sp_ids`. partition_strategy: A string specifying the partitioning strategy, relevant if `len(params) > 1`. Currently `"div"` and `"mod"` are supported. Default is `"mod"`. See `tf.nn.embedding_lookup` for more details. @@ -351,39 +351,43 @@ def embedding_lookup_sparse(params, Returns: A dense tensor representing the combined embeddings for the - sparse ids. For each row in the dense tensor represented by sp_ids, the op + sparse ids. For each row in the dense tensor represented by `sp_ids`, the op looks up the embeddings for all ids in that row, multiplies them by the corresponding weight, and combines these embeddings as specified. In other words, if - shape(combined params) = [p0, p1, ..., pm] + `shape(combined params) = [p0, p1, ..., pm]` and - shape(sp_ids) = shape(sp_weights) = [d0, d1, ..., dn] + `shape(sp_ids) = shape(sp_weights) = [d0, d1, ..., dn]` then - shape(output) = [d0, d1, ..., dn-1, p1, ..., pm]. + `shape(output) = [d0, d1, ..., dn-1, p1, ..., pm]`. For instance, if params is a 10x20 matrix, and sp_ids / sp_weights are + ```python [0, 0]: id 1, weight 2.0 [0, 1]: id 3, weight 0.5 [1, 0]: id 0, weight 1.0 [2, 3]: id 1, weight 3.0 + ``` with `combiner`="mean", then the output will be a 3x20 matrix where + ```python output[0, :] = (params[1, :] * 2.0 + params[3, :] * 0.5) / (2.0 + 0.5) output[1, :] = (params[0, :] * 1.0) / 1.0 output[2, :] = (params[1, :] * 3.0) / 3.0 + ``` Raises: - TypeError: If sp_ids is not a SparseTensor, or if sp_weights is neither - None nor SparseTensor. - ValueError: If combiner is not one of {"mean", "sqrtn", "sum"}. + TypeError: If `sp_ids` is not a `SparseTensor`, or if `sp_weights` is + neither `None` nor `SparseTensor`. + ValueError: If `combiner` is not one of {"mean", "sqrtn", "sum"}. """ if combiner is None: logging.warn("The default value of combiner will change from \"mean\" " diff --git a/tensorflow/python/ops/histogram_ops.py b/tensorflow/python/ops/histogram_ops.py index 4a1ef54fb5..ec38d89a0e 100644 --- a/tensorflow/python/ops/histogram_ops.py +++ b/tensorflow/python/ops/histogram_ops.py @@ -32,7 +32,6 @@ from tensorflow.python.ops import clip_ops from tensorflow.python.ops import gen_math_ops from tensorflow.python.ops import math_ops from tensorflow.python.util.tf_export import tf_export -from tensorflow.python.util.tf_export import tf_export @tf_export('histogram_fixed_width_bins') diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py index 3369fe3c9b..601010bce9 100644 --- a/tensorflow/python/ops/image_ops_impl.py +++ b/tensorflow/python/ops/image_ops_impl.py @@ -269,17 +269,7 @@ def random_flip_up_down(image, seed=None): Raises: ValueError: if the shape of `image` not supported. """ - with ops.name_scope(None, 'random_flip_up_down', [image]) as scope: - image = ops.convert_to_tensor(image, name='image') - image = _Assert3DImage(image) - uniform_random = random_ops.random_uniform([], 0, 1.0, seed=seed) - mirror_cond = math_ops.less(uniform_random, .5) - result = control_flow_ops.cond( - mirror_cond, - lambda: array_ops.reverse(image, [0]), - lambda: image, - name=scope) - return fix_image_flip_shape(image, result) + return _random_flip(image, 0, seed, 'random_flip_up_down') @tf_export('image.random_flip_left_right') @@ -301,14 +291,34 @@ def random_flip_left_right(image, seed=None): Raises: ValueError: if the shape of `image` not supported. """ - with ops.name_scope(None, 'random_flip_left_right', [image]) as scope: + return _random_flip(image, 1, seed, 'random_flip_left_right') + + +def _random_flip(image, flip_index, seed, scope_name): + """Randomly (50% chance) flip an image along axis `flip_index`. + Args: + image: A 3-D tensor of shape `[height, width, channels].` + flip_index: The dimension along which to flip the image. + Vertical: 0, Horizontal: 1 + seed: A Python integer. Used to create a random seed. See + @{tf.set_random_seed} + for behavior. + scope_name: Name of the scope in which the ops are added. + + Returns: + A 3-D tensor of the same type and shape as `image`. + + Raises: + ValueError: if the shape of `image` not supported. + """ + with ops.name_scope(None, scope_name, [image]) as scope: image = ops.convert_to_tensor(image, name='image') image = _Assert3DImage(image) uniform_random = random_ops.random_uniform([], 0, 1.0, seed=seed) mirror_cond = math_ops.less(uniform_random, .5) result = control_flow_ops.cond( mirror_cond, - lambda: array_ops.reverse(image, [1]), + lambda: array_ops.reverse(image, [flip_index]), lambda: image, name=scope) return fix_image_flip_shape(image, result) @@ -332,16 +342,7 @@ def flip_left_right(image): Raises: ValueError: if the shape of `image` not supported. """ - with ops.name_scope(None, 'flip_left_right', [image]): - image = ops.convert_to_tensor(image, name='image') - image = _AssertAtLeast3DImage(image) - shape = image.get_shape() - if shape.ndims == 3 or shape.ndims is None: - return fix_image_flip_shape(image, array_ops.reverse(image, [1])) - elif shape.ndims == 4: - return array_ops.reverse(image, [2]) - else: - raise ValueError('\'image\' must have either 3 or 4 dimensions.') + return _flip(image, 1, 'flip_left_right') @tf_export('image.flip_up_down') @@ -362,14 +363,35 @@ def flip_up_down(image): Raises: ValueError: if the shape of `image` not supported. """ - with ops.name_scope(None, 'flip_up_down', [image]): + return _flip(image, 0, 'flip_up_down') + + +def _flip(image, flip_index, scope_name): + """Flip an image either horizontally or vertically. + + Outputs the contents of `image` flipped along the dimension `flip_index`. + + See also `reverse()`. + + Args: + image: 4-D Tensor of shape `[batch, height, width, channels]` or + 3-D Tensor of shape `[height, width, channels]`. + flip_index: 0 For vertical, 1 for horizontal. + + Returns: + A tensor of the same type and shape as `image`. + + Raises: + ValueError: if the shape of `image` not supported. + """ + with ops.name_scope(None, scope_name, [image]): image = ops.convert_to_tensor(image, name='image') image = _AssertAtLeast3DImage(image) shape = image.get_shape() if shape.ndims == 3 or shape.ndims is None: - return fix_image_flip_shape(image, array_ops.reverse(image, [0])) + return fix_image_flip_shape(image, array_ops.reverse(image, [flip_index])) elif shape.ndims == 4: - return array_ops.reverse(image, [1]) + return array_ops.reverse(image, [flip_index+1]) else: raise ValueError('\'image\' must have either 3 or 4 dimensions.') diff --git a/tensorflow/python/ops/init_ops.py b/tensorflow/python/ops/init_ops.py index 39b7295124..f93bf0a17f 100644 --- a/tensorflow/python/ops/init_ops.py +++ b/tensorflow/python/ops/init_ops.py @@ -39,10 +39,10 @@ import numpy as np from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.ops import array_ops -from tensorflow.python.ops import linalg_ops +from tensorflow.python.ops import linalg_ops_impl +from tensorflow.python.ops import gen_linalg_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import random_ops -from tensorflow.python.ops import random_ops from tensorflow.python.util.deprecation import deprecated from tensorflow.python.util.tf_export import tf_export @@ -529,7 +529,7 @@ class Orthogonal(Initializer): # Generate a random matrix a = random_ops.random_normal(flat_shape, dtype=dtype, seed=self.seed) # Compute the qr factorization - q, r = linalg_ops.qr(a, full_matrices=False) + q, r = gen_linalg_ops.qr(a, full_matrices=False) # Make Q uniform d = array_ops.diag_part(r) q *= math_ops.sign(d) @@ -577,7 +577,7 @@ class ConvolutionDeltaOrthogonal(Initializer): a = random_ops.random_normal([shape[-1], shape[-1]], dtype=dtype, seed=self.seed) # Compute the qr factorization - q, r = linalg_ops.qr(a, full_matrices=False) + q, r = gen_linalg_ops.qr(a, full_matrices=False) # Make Q uniform d = array_ops.diag_part(r) q *= math_ops.sign(d) @@ -636,7 +636,7 @@ class ConvolutionOrthogonal(Initializer): a = random_ops.random_normal([n, n], dtype=self.dtype, seed=self.seed) if self.seed: self.seed += 1 - q, r = linalg_ops.qr(a) + q, r = gen_linalg_ops.qr(a) d = array_ops.diag_part(r) # make q uniform q *= math_ops.sign(d) @@ -723,7 +723,7 @@ class ConvolutionOrthogonal2D(ConvolutionOrthogonal): raise ValueError("The dimension of the matrices must be the same.") n = p1.shape.as_list()[0] kernel2x2 = {} - eye = linalg_ops.eye(n, dtype=self.dtype) + eye = linalg_ops_impl.eye(n, dtype=self.dtype) kernel2x2[0, 0] = math_ops.matmul(p1, p2) kernel2x2[0, 1] = math_ops.matmul(p1, (eye - p2)) kernel2x2[1, 0] = math_ops.matmul((eye - p1), p2) @@ -848,7 +848,7 @@ class ConvolutionOrthogonal1D(ConvolutionOrthogonal): """ n = projection_matrix.shape.as_list()[0] kernel = {} - eye = linalg_ops.eye(n, dtype=self.dtype) + eye = linalg_ops_impl.eye(n, dtype=self.dtype) kernel[0] = projection_matrix kernel[1] = eye - projection_matrix return kernel @@ -976,7 +976,7 @@ class ConvolutionOrthogonal3D(ConvolutionOrthogonal): if p1_shape != p2.shape.as_list() or p1_shape != p3.shape.as_list(): raise ValueError("The dimension of the matrices must be the same.") n = p1_shape[0] - eye = linalg_ops.eye(n, dtype=self.dtype) + eye = linalg_ops_impl.eye(n, dtype=self.dtype) kernel2x2x2 = {} def matmul(p1, p2, p3): return math_ops.matmul(math_ops.matmul(p1, p2), p3) @@ -1084,7 +1084,7 @@ class Identity(Initializer): "Identity matrix initializer can only be used for 2D matrices.") if dtype is None: dtype = self.dtype - initializer = linalg_ops.eye(*full_shape, dtype=dtype) + initializer = linalg_ops_impl.eye(*full_shape, dtype=dtype) if partition_info is not None: initializer = array_ops.slice(initializer, partition_info.var_offset, shape) diff --git a/tensorflow/python/ops/linalg_ops.py b/tensorflow/python/ops/linalg_ops.py index 170861b43f..a0dfa543f9 100644 --- a/tensorflow/python/ops/linalg_ops.py +++ b/tensorflow/python/ops/linalg_ops.py @@ -24,12 +24,13 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import functional_ops from tensorflow.python.ops import gen_linalg_ops +from tensorflow.python.ops import linalg_ops_impl from tensorflow.python.ops import math_ops # pylint: disable=wildcard-import from tensorflow.python.ops.gen_linalg_ops import * # pylint: enable=wildcard-import -from tensorflow.python.util import compat from tensorflow.python.util import deprecation from tensorflow.python.util.tf_export import tf_export @@ -159,36 +160,11 @@ def eye(num_rows, Returns: A `Tensor` of shape `batch_shape + [num_rows, num_columns]` """ - with ops.name_scope( - name, default_name='eye', values=[num_rows, num_columns, batch_shape]): - is_square = num_columns is None - batch_shape = [] if batch_shape is None else batch_shape - num_columns = num_rows if num_columns is None else num_columns - if isinstance(num_rows, ops.Tensor) or isinstance( - num_columns, ops.Tensor) or isinstance(batch_shape, ops.Tensor): - batch_shape = ops.convert_to_tensor( - batch_shape, name='shape', dtype=dtypes.int32) - diag_size = math_ops.minimum(num_rows, num_columns) - diag_shape = array_ops.concat((batch_shape, [diag_size]), 0) - if not is_square: - shape = array_ops.concat((batch_shape, [num_rows, num_columns]), 0) - else: - if not isinstance(num_rows, compat.integral_types) or not isinstance( - num_columns, compat.integral_types): - raise TypeError( - 'num_rows and num_columns must be positive integer values.') - batch_shape = [dim for dim in batch_shape] - is_square = num_rows == num_columns - diag_shape = batch_shape + [np.minimum(num_rows, num_columns)] - if not is_square: - shape = batch_shape + [num_rows, num_columns] - - diag_ones = array_ops.ones(diag_shape, dtype=dtype) - if is_square: - return array_ops.matrix_diag(diag_ones) - else: - zero_matrix = array_ops.zeros(shape, dtype=dtype) - return array_ops.matrix_set_diag(zero_matrix, diag_ones) + return linalg_ops_impl.eye(num_rows, + num_columns=num_columns, + batch_shape=batch_shape, + dtype=dtype, + name=name) @tf_export('matrix_solve_ls', 'linalg.lstsq') @@ -454,7 +430,7 @@ def norm(tensor, This function can compute several different vector norms (the 1-norm, the Euclidean or 2-norm, the inf-norm, and in general the p-norm for p > 0) and - matrix norms (Frobenius, 1-norm, and inf-norm). + matrix norms (Frobenius, 1-norm, 2-norm and inf-norm). Args: tensor: `Tensor` of types `float32`, `float64`, `complex64`, `complex128` @@ -465,7 +441,7 @@ def norm(tensor, Some restrictions apply: a) The Frobenius norm `fro` is not defined for vectors, b) If axis is a 2-tuple (matrix norm), only 'euclidean', 'fro', `1`, - `np.inf` are supported. + `2`, `np.inf` are supported. See the description of `axis` on how to compute norms for a batch of vectors or matrices stored in a tensor. axis: If `axis` is `None` (the default), the input is considered a vector @@ -521,8 +497,7 @@ def norm(tensor, axis[0] == axis[1]): raise ValueError( "'axis' must be None, an integer, or a tuple of 2 unique integers") - # TODO(rmlarsen): Implement matrix 2-norm using tf.svd(). - supported_matrix_norms = ['euclidean', 'fro', 1, np.inf] + supported_matrix_norms = ['euclidean', 'fro', 1, 2, np.inf] if ord not in supported_matrix_norms: raise ValueError("'ord' must be a supported matrix norm in %s, got %s" % (supported_matrix_norms, ord)) @@ -539,12 +514,34 @@ def norm(tensor, with ops.name_scope(name, 'norm', [tensor]): tensor = ops.convert_to_tensor(tensor) + if ord in ['fro', 'euclidean', 2, 2.0]: - # TODO(rmlarsen): Move 2-norm to a separate clause once we support it for - # matrices. - result = math_ops.sqrt( - math_ops.reduce_sum( - tensor * math_ops.conj(tensor), axis, keepdims=True)) + if is_matrix_norm and ord in [2, 2.0]: + rank = array_ops.rank(tensor) + positive_axis = functional_ops.map_fn( + lambda i: control_flow_ops.cond(i >= 0, lambda: i, lambda: i + rank), + ops.convert_to_tensor(axis)) + axes = math_ops.range(rank) + perm_before = array_ops.concat( + [array_ops.setdiff1d(axes, positive_axis)[0], positive_axis], + axis=0) + perm_after = functional_ops.map_fn( + lambda i: math_ops.cast( + array_ops.squeeze( + array_ops.where(math_ops.equal(perm_before, i))), + dtype=dtypes.int32), axes) + permed = array_ops.transpose(tensor, perm=perm_before) + matrix_2_norm = array_ops.expand_dims( + math_ops.reduce_max( + math_ops.abs(gen_linalg_ops.svd(permed, compute_uv=False)[0]), + axis=-1, + keepdims=True), + axis=-1) + result = array_ops.transpose(matrix_2_norm, perm=perm_after) + else: + result = math_ops.sqrt( + math_ops.reduce_sum( + tensor * math_ops.conj(tensor), axis, keepdims=True)) else: result = math_ops.abs(tensor) if ord == 1: diff --git a/tensorflow/python/ops/linalg_ops_impl.py b/tensorflow/python/ops/linalg_ops_impl.py new file mode 100644 index 0000000000..e7c89f6ae3 --- /dev/null +++ b/tensorflow/python/ops/linalg_ops_impl.py @@ -0,0 +1,73 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Operations for linear algebra.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.util import compat + +# Names below are lower_case. +# pylint: disable=invalid-name + + +def eye(num_rows, + num_columns=None, + batch_shape=None, + dtype=dtypes.float32, + name=None): + """Construct an identity matrix, or a batch of matrices. + + See `linalg_ops.eye`. + """ + with ops.name_scope( + name, default_name='eye', values=[num_rows, num_columns, batch_shape]): + is_square = num_columns is None + batch_shape = [] if batch_shape is None else batch_shape + num_columns = num_rows if num_columns is None else num_columns + if isinstance(num_rows, ops.Tensor) or isinstance( + num_columns, ops.Tensor) or isinstance(batch_shape, ops.Tensor): + batch_shape = ops.convert_to_tensor( + batch_shape, name='shape', dtype=dtypes.int32) + diag_size = math_ops.minimum(num_rows, num_columns) + diag_shape = array_ops.concat((batch_shape, [diag_size]), 0) + if not is_square: + shape = array_ops.concat((batch_shape, [num_rows, num_columns]), 0) + else: + if not isinstance(num_rows, compat.integral_types) or not isinstance( + num_columns, compat.integral_types): + raise TypeError( + 'num_rows and num_columns must be positive integer values.') + batch_shape = [dim for dim in batch_shape] + is_square = num_rows == num_columns + diag_shape = batch_shape + [np.minimum(num_rows, num_columns)] + if not is_square: + shape = batch_shape + [num_rows, num_columns] + + diag_ones = array_ops.ones(diag_shape, dtype=dtype) + if is_square: + return array_ops.matrix_diag(diag_ones) + else: + zero_matrix = array_ops.zeros(shape, dtype=dtype) + return array_ops.matrix_set_diag(zero_matrix, diag_ones) + +# pylint: enable=invalid-name,redefined-builtin diff --git a/tensorflow/python/ops/losses/losses_impl.py b/tensorflow/python/ops/losses/losses_impl.py index 34ca1adc3e..9fc545c967 100644 --- a/tensorflow/python/ops/losses/losses_impl.py +++ b/tensorflow/python/ops/losses/losses_impl.py @@ -29,6 +29,7 @@ from tensorflow.python.ops import nn_ops from tensorflow.python.ops import weights_broadcast_ops from tensorflow.python.ops.losses import util from tensorflow.python.util.deprecation import deprecated_args +from tensorflow.python.util.deprecation import deprecated_argument_lookup from tensorflow.python.util.tf_export import tf_export @@ -306,11 +307,8 @@ def cosine_distance( ValueError: If `predictions` shape doesn't match `labels` shape, or `axis`, `labels`, `predictions` or `weights` is `None`. """ - if dim is not None: - if axis is not None: - raise ValueError("Cannot specify both 'axis' and 'dim'") - axis = dim - if axis is None and dim is None: + axis = deprecated_argument_lookup("axis", axis, "dim", dim) + if axis is None: raise ValueError("You must specify 'axis'.") if labels is None: raise ValueError("labels must not be None.") @@ -696,7 +694,7 @@ def softmax_cross_entropy( onehot_labels, logits, weights=1.0, label_smoothing=0, scope=None, loss_collection=ops.GraphKeys.LOSSES, reduction=Reduction.SUM_BY_NONZERO_WEIGHTS): - """Creates a cross-entropy loss using tf.nn.softmax_cross_entropy_with_logits. + """Creates a cross-entropy loss using tf.nn.softmax_cross_entropy_with_logits_v2. `weights` acts as a coefficient for the loss. If a scalar is provided, then the loss is simply scaled by the given value. If `weights` is a @@ -707,11 +705,16 @@ def softmax_cross_entropy( new_onehot_labels = onehot_labels * (1 - label_smoothing) + label_smoothing / num_classes + Note that `onehot_labels` and `logits` must have the same shape, + e.g. `[batch_size, num_classes]`. The shape of `weights` must be + broadcastable to loss, whose shape is decided by the shape of `logits`. + In case the shape of `logits` is `[batch_size, num_classes]`, loss is + a `Tensor` of shape `[batch_size]`. + Args: - onehot_labels: `[batch_size, num_classes]` target one-hot-encoded labels. - logits: `[batch_size, num_classes]` logits outputs of the network . - weights: Optional `Tensor` whose rank is either 0, or rank 1 and is - broadcastable to the loss which is a `Tensor` of shape `[batch_size]`. + onehot_labels: One-hot-encoded labels. + logits: Logits outputs of the network. + weights: Optional `Tensor` that is broadcastable to loss. label_smoothing: If greater than 0 then smooth the labels. scope: the scope for the operations performed in computing the loss. loss_collection: collection to which the loss will be added. diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py index 2b04866fef..2feb88cb7b 100644 --- a/tensorflow/python/ops/math_ops.py +++ b/tensorflow/python/ops/math_ops.py @@ -211,11 +211,9 @@ def argmax(input, name=None, dimension=None, output_type=dtypes.int64): - if dimension is not None: - if axis is not None: - raise ValueError("Cannot specify both 'axis' and 'dimension'") - axis = dimension - elif axis is None: + axis = deprecation.deprecated_argument_lookup( + "axis", axis, "dimension", dimension) + if axis is None: axis = 0 return gen_math_ops.arg_max(input, axis, name=name, output_type=output_type) @@ -231,11 +229,9 @@ def argmin(input, name=None, dimension=None, output_type=dtypes.int64): - if dimension is not None: - if axis is not None: - raise ValueError("Cannot specify both 'axis' and 'dimension'") - axis = dimension - elif axis is None: + axis = deprecation.deprecated_argument_lookup( + "axis", axis, "dimension", dimension) + if axis is None: axis = 0 return gen_math_ops.arg_min(input, axis, name=name, output_type=output_type) @@ -761,13 +757,25 @@ def cast(x, dtype, name=None): tf.cast(x, tf.int32) # [1, 2], dtype=tf.int32 ``` + The operation supports data types (for `x` and `dtype`) of + `uint8`, `int8`, `uint16`, `int16`, `int32`, `int64`, `float16`, `float32`, + `float64`, `complex64`, `complex128`, `bfloat16`. In case of casting from + complex types (`complex64`, `complex128`) to real types, only the real part + of `x` is returned. In case of casting from real types to complex types + (`complex64`, `complex128`), the imaginary part of the returned value is set + to `0`. The handling of complex types here matches the behavior of numpy. + Args: - x: A `Tensor` or `SparseTensor`. - dtype: The destination type. + x: A `Tensor` or `SparseTensor` of numeric type. It could be + `uint8`, `int8`, `uint16`, `int16`, `int32`, `int64`, + `float16`, `float32`, `float64`, `complex64`, `complex128`, `bfloat16`. + dtype: The destination type. The list of supported dtypes is the same + as `x`. name: A name for the operation (optional). Returns: - A `Tensor` or `SparseTensor` with same shape as `x`. + A `Tensor` or `SparseTensor` with same shape as `x` and + same type as `dtype`. Raises: TypeError: If `x` cannot be cast to the `dtype`. @@ -1634,7 +1642,7 @@ def reduce_min(input_tensor, tensor with a single element is returned. Args: - input_tensor: The tensor to reduce. Should have numeric type. + input_tensor: The tensor to reduce. Should have real numeric type. axis: The dimensions to reduce. If `None` (the default), reduces all dimensions. Must be in the range `[-rank(input_tensor), rank(input_tensor))`. @@ -1683,7 +1691,7 @@ def reduce_max(input_tensor, tensor with a single element is returned. Args: - input_tensor: The tensor to reduce. Should have numeric type. + input_tensor: The tensor to reduce. Should have real numeric type. axis: The dimensions to reduce. If `None` (the default), reduces all dimensions. Must be in the range `[-rank(input_tensor), rank(input_tensor))`. diff --git a/tensorflow/python/ops/nn.py b/tensorflow/python/ops/nn.py index 244702d13b..1d0d9a52a1 100644 --- a/tensorflow/python/ops/nn.py +++ b/tensorflow/python/ops/nn.py @@ -98,6 +98,7 @@ See the @{$python/nn} guide. @@fixed_unigram_candidate_sampler @@compute_accidental_hits @@quantized_conv2d +@@quantized_relu @@quantized_relu_x @@quantized_max_pool @@quantized_avg_pool diff --git a/tensorflow/python/ops/nn_impl.py b/tensorflow/python/ops/nn_impl.py index 47cc4da7f2..d0d5ed07ce 100644 --- a/tensorflow/python/ops/nn_impl.py +++ b/tensorflow/python/ops/nn_impl.py @@ -987,7 +987,7 @@ def _compute_sampled_logits(weights, class biases. labels: A `Tensor` of type `int64` and shape `[batch_size, num_true]`. The target classes. Note that this format differs from - the `labels` argument of `nn.softmax_cross_entropy_with_logits`. + the `labels` argument of `nn.softmax_cross_entropy_with_logits_v2`. inputs: A `Tensor` of shape `[batch_size, dim]`. The forward activations of the input network. num_sampled: An `int`. The number of classes to randomly sample per batch. @@ -1012,7 +1012,7 @@ def _compute_sampled_logits(weights, out_logits: `Tensor` object with shape `[batch_size, num_true + num_sampled]`, for passing to either `nn.sigmoid_cross_entropy_with_logits` (NCE) or - `nn.softmax_cross_entropy_with_logits` (sampled softmax). + `nn.softmax_cross_entropy_with_logits_v2` (sampled softmax). out_labels: A Tensor object with the same shape as `out_logits`. """ @@ -1285,7 +1285,7 @@ def sampled_softmax_loss(weights, logits = tf.matmul(inputs, tf.transpose(weights)) logits = tf.nn.bias_add(logits, biases) labels_one_hot = tf.one_hot(labels, n_classes) - loss = tf.nn.softmax_cross_entropy_with_logits( + loss = tf.nn.softmax_cross_entropy_with_logits_v2( labels=labels_one_hot, logits=logits) ``` @@ -1303,7 +1303,7 @@ def sampled_softmax_loss(weights, biases: A `Tensor` of shape `[num_classes]`. The class biases. labels: A `Tensor` of type `int64` and shape `[batch_size, num_true]`. The target classes. Note that this format differs from - the `labels` argument of `nn.softmax_cross_entropy_with_logits`. + the `labels` argument of `nn.softmax_cross_entropy_with_logits_v2`. inputs: A `Tensor` of shape `[batch_size, dim]`. The forward activations of the input network. num_sampled: An `int`. The number of classes to randomly sample per batch. @@ -1340,7 +1340,8 @@ def sampled_softmax_loss(weights, partition_strategy=partition_strategy, name=name, seed=seed) - sampled_losses = nn_ops.softmax_cross_entropy_with_logits( + labels = array_ops.stop_gradient(labels, name="labels_stop_gradient") + sampled_losses = nn_ops.softmax_cross_entropy_with_logits_v2( labels=labels, logits=logits) # sampled_losses is a [batch_size] tensor. return sampled_losses diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py index bb454b3c3a..cd07550d2e 100644 --- a/tensorflow/python/ops/nn_ops.py +++ b/tensorflow/python/ops/nn_ops.py @@ -1155,7 +1155,7 @@ def atrous_conv2d(value, filters, rate, padding, name=None): Returns: A `Tensor` with the same type as `value`. - Output shape with `'VALID`` padding is: + Output shape with `'VALID'` padding is: [batch, height - 2 * (filter_width - 1), width - 2 * (filter_height - 1), out_channels]. @@ -1458,10 +1458,10 @@ def conv3d_transpose( if isinstance(output_shape, (list, np.ndarray)): # output_shape's shape should be == [5] if reached this point. - if not filter.get_shape()[3].is_compatible_with(output_shape[4]): + if not filter.get_shape()[3].is_compatible_with(output_shape[axis]): raise ValueError( "output_shape does not match filter's output channels, " - "{} != {}".format(output_shape[4], + "{} != {}".format(output_shape[axis], filter.get_shape()[3])) if padding != "VALID" and padding != "SAME": @@ -1986,7 +1986,7 @@ def sparse_softmax_cross_entropy_with_logits( must provide a single specific index for the true class for each row of `logits` (each minibatch entry). For soft softmax classification with a probability distribution for each entry, see - `softmax_cross_entropy_with_logits`. + `softmax_cross_entropy_with_logits_v2`. **WARNING:** This op expects unscaled logits, since it performs a `softmax` on `logits` internally for efficiency. Do not call this op with the diff --git a/tensorflow/python/ops/rnn_cell_impl.py b/tensorflow/python/ops/rnn_cell_impl.py index 9251e9802c..86dc053c0f 100644 --- a/tensorflow/python/ops/rnn_cell_impl.py +++ b/tensorflow/python/ops/rnn_cell_impl.py @@ -617,9 +617,9 @@ class BasicLSTMCell(LayerRNNCell): Args: inputs: `2-D` tensor with shape `[batch_size, input_size]`. state: An `LSTMStateTuple` of state tensors, each shaped - `[batch_size, self.state_size]`, if `state_is_tuple` has been set to + `[batch_size, num_units]`, if `state_is_tuple` has been set to `True`. Otherwise, a `Tensor` shaped - `[batch_size, 2 * self.state_size]`. + `[batch_size, 2 * num_units]`. Returns: A pair containing the new hidden state, and the new state (either a diff --git a/tensorflow/python/profiler/tfprof_logger_test.py b/tensorflow/python/profiler/tfprof_logger_test.py index 141144f987..caf3869f56 100644 --- a/tensorflow/python/profiler/tfprof_logger_test.py +++ b/tensorflow/python/profiler/tfprof_logger_test.py @@ -38,7 +38,7 @@ class TFProfLoggerTest(test.TestCase): return math_ops.matmul(a, b) # pylint: disable=pointless-string-statement - """# TODO(xpan): This this out of core so it doesn't depend on contrib. + """# TODO(xpan): This out of core so it doesn't depend on contrib. def testFillMissingShape(self): a, b, y = self._BuildSmallPlaceholderlModel() run_options = config_pb2.RunOptions( diff --git a/tensorflow/python/tools/saved_model_cli.py b/tensorflow/python/tools/saved_model_cli.py index b88be4ae04..73ea85ab0c 100644 --- a/tensorflow/python/tools/saved_model_cli.py +++ b/tensorflow/python/tools/saved_model_cli.py @@ -41,6 +41,7 @@ from tensorflow.python.debug.wrappers import local_cli_wrapper from tensorflow.python.framework import meta_graph as meta_graph_lib from tensorflow.python.framework import ops as ops_lib from tensorflow.python.platform import app # pylint: disable=unused-import +from tensorflow.python.lib.io import file_io from tensorflow.python.saved_model import loader from tensorflow.python.tools import saved_model_utils @@ -543,7 +544,7 @@ def load_inputs_from_input_arg_string(inputs_str, input_exprs_str, input_examples = preprocess_input_examples_arg_string(input_examples_str) for input_tensor_key, (filename, variable_name) in inputs.items(): - data = np.load(filename) + data = np.load(file_io.FileIO(filename, mode='r')) # When a variable_name key is specified for the input file if variable_name: diff --git a/tensorflow/python/training/saver_test.py b/tensorflow/python/training/saver_test.py index 3867c0d8da..70495291bc 100644 --- a/tensorflow/python/training/saver_test.py +++ b/tensorflow/python/training/saver_test.py @@ -2731,7 +2731,7 @@ class ScopedGraphTest(test.TestCase): # The rest of the variables. rest_variables = list( set(variables.global_variables()) - set(var_list.keys())) - init_rest_op = variables.initialize_variables(rest_variables) + init_rest_op = variables.variables_initializer(rest_variables) with self.test_session(graph=graph) as sess: saver = saver_module.Saver(var_list=var_list, max_to_keep=1) diff --git a/tensorflow/python/util/compat.py b/tensorflow/python/util/compat.py index 4163fcac79..3358ffe526 100644 --- a/tensorflow/python/util/compat.py +++ b/tensorflow/python/util/compat.py @@ -42,10 +42,8 @@ import six as _six from tensorflow.python.util.all_util import remove_undocumented from tensorflow.python.util.tf_export import tf_export -from tensorflow.python.util.tf_export import tf_export -@tf_export('compat.as_bytes', 'compat.as_str') def as_bytes(bytes_or_text, encoding='utf-8'): """Converts either bytes or unicode to `bytes`, using utf-8 encoding for text. @@ -68,7 +66,6 @@ def as_bytes(bytes_or_text, encoding='utf-8'): (bytes_or_text,)) -@tf_export('compat.as_text') def as_text(bytes_or_text, encoding='utf-8'): """Returns the given argument as a unicode string. @@ -93,8 +90,12 @@ def as_text(bytes_or_text, encoding='utf-8'): # Convert an object to a `str` in both Python 2 and 3. if _six.PY2: as_str = as_bytes + tf_export('compat.as_bytes', 'compat.as_str')(as_bytes) + tf_export('compat.as_text')(as_text) else: as_str = as_text + tf_export('compat.as_bytes')(as_bytes) + tf_export('compat.as_text', 'compat.as_str')(as_text) @tf_export('compat.as_str_any') diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc index 640f270323..102419a264 100644 --- a/tensorflow/stream_executor/cuda/cuda_dnn.cc +++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc @@ -524,11 +524,12 @@ port::Status CudnnSupport::Init() { ToString(status))}; } -port::StatusOr> CudnnSupport::GetVersion() { +port::StatusOr +CudnnSupport::GetVersion() { CudnnVersion version; TF_RETURN_IF_ERROR(GetLoadedCudnnVersion(&version)); - return std::make_tuple(version.major_version, version.minor_version, - version.patch_level); + return perftools::gputools::dnn::VersionInfo( + version.major_version, version.minor_version, version.patch_level); } // Turns a BatchDescriptor structure into a cudnn tensor handle within a scope. diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.h b/tensorflow/stream_executor/cuda/cuda_dnn.h index e6d12bfef9..5ded7cf154 100644 --- a/tensorflow/stream_executor/cuda/cuda_dnn.h +++ b/tensorflow/stream_executor/cuda/cuda_dnn.h @@ -45,7 +45,7 @@ class CudnnSupport : public dnn::DnnSupport { ~CudnnSupport() override; port::Status Init() override; - port::StatusOr> GetVersion() override; + port::StatusOr GetVersion() override; port::StatusOr> createRnnDescriptor( int num_layers, int hidden_size, int input_size, diff --git a/tensorflow/stream_executor/cuda/cuda_driver.cc b/tensorflow/stream_executor/cuda/cuda_driver.cc index fedf4f53b8..71cab145b9 100644 --- a/tensorflow/stream_executor/cuda/cuda_driver.cc +++ b/tensorflow/stream_executor/cuda/cuda_driver.cc @@ -37,14 +37,6 @@ limitations under the License. #include "tensorflow/stream_executor/platform/port.h" #include "tensorflow/stream_executor/lib/inlined_vector.h" -#if defined(PLATFORM_WINDOWS) -// TODO: in windows ARRAYSIZE is defined in winnt.h but including it -// here creates a conflict with cuda.h - for now define it here. -#define ARRAYSIZE(a) \ - ((sizeof(a) / sizeof(*(a))) / \ - static_cast(!(sizeof(a) % sizeof(*(a))))) -#endif - bool FLAGS_gpuexec_cuda_driver_inject_init_error = false; bool FLAGS_gpuexec_cuda_sync_around_driver_calls = false; bool FLAGS_gpuexec_cuda_device_0_only = false; @@ -719,15 +711,15 @@ CUDADriver::ContextGetSharedMemConfig(CudaContext* context) { port::bit_cast(uintptr_t(info_log_buffer_bytes)), port::bit_cast(info_log_buffer.data()), port::bit_cast(uintptr_t(log_verbose))}; - CHECK(ARRAYSIZE(options) == ARRAYSIZE(option_values)); + CHECK(TF_ARRAYSIZE(options) == TF_ARRAYSIZE(option_values)); CUresult res; { // TODO(leary) Need to see if NVIDIA can expunge the leakiness in their // module loading: see http://b/13248943 - res = cuModuleLoadDataEx(module, ptx_data, ARRAYSIZE(options), options, - option_values); + res = cuModuleLoadDataEx(module, ptx_data, TF_ARRAYSIZE(options), + options, option_values); } // The PTX JIT mutates the values in the option values array to reflect the diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc index 9700daca89..7c87d33d21 100644 --- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc +++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc @@ -1126,7 +1126,7 @@ DeviceDescription *CUDAExecutor::PopulateDeviceDescription() const { builder.set_name(device_name); } - for (size_t i = 0; i < ARRAYSIZE(kAllUnqueryableDeviceParams); i++) { + for (size_t i = 0; i < TF_ARRAYSIZE(kAllUnqueryableDeviceParams); i++) { const auto ¶ms = kAllUnqueryableDeviceParams[i]; if (params.cc_major == cc_major_ && params.cc_minor == cc_minor_) { builder.set_blocks_per_core_limit(params.blocks_per_core_limit); diff --git a/tensorflow/stream_executor/dnn.h b/tensorflow/stream_executor/dnn.h index 8e202d115a..39f21d8b10 100644 --- a/tensorflow/stream_executor/dnn.h +++ b/tensorflow/stream_executor/dnn.h @@ -875,6 +875,22 @@ enum class ElementwiseOperation { kAdd, kMultiply }; string ElementwiseOperationString(ElementwiseOperation op); +// A simple class representing the version of the backing library, to +// workaround the "too perfect forwarding" issue in gcc6+ compilers. +// See PR#16309 and issue #18402 for links discussing the issue. +class VersionInfo { + public: + VersionInfo(int major = 0, int minor = 0, int patch = 0) + : major_(major), minor_(minor), patch_(patch) {} + int major_version() { return major_; } + int minor_version() { return minor_; } + int patch() { return patch_; } + private: + int major_; + int minor_; + int patch_; +}; + // Suite of operations typically used for implementing Deep/Convolutional Neural // Nets. Note: A false return value of an operation indicates the // implementation is not available. @@ -885,8 +901,8 @@ class DnnSupport { virtual port::Status Init() = 0; - // Gets the version of the backing library, as a {major, minor, patch} tuple. - virtual port::StatusOr> GetVersion() { + // Gets the version of the backing library, as a VersionInfo object. + virtual port::StatusOr GetVersion() { return port::UnimplementedError( "DnnSupport::GetVersion not implemented on this platform."); } diff --git a/tensorflow/stream_executor/platform/port.h b/tensorflow/stream_executor/platform/port.h index 259cf380d6..57ad965ef1 100644 --- a/tensorflow/stream_executor/platform/port.h +++ b/tensorflow/stream_executor/platform/port.h @@ -38,12 +38,6 @@ using tensorflow::uint64; using std::string; #endif -#if !defined(COMPILER_MSVC) -#define ARRAYSIZE(a) \ - ((sizeof(a) / sizeof(*(a))) / \ - static_cast(!(sizeof(a) % sizeof(*(a))))) -#endif - using tensorflow::LinkerInitialized; using tensorflow::LINKER_INITIALIZED; diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index 528f811b40..51e856bed0 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -163,7 +163,6 @@ def if_override_eigen_strong_inline(a): def get_win_copts(is_external=False): WINDOWS_COPTS = [ - "/D__VERSION__=\\\"MSVC\\\"", "/DPLATFORM_WINDOWS", "/DEIGEN_HAS_C99_MATH", "/DTENSORFLOW_USE_EIGEN_THREADPOOL", @@ -1704,7 +1703,7 @@ def tf_version_info_genrule(): ], outs=["util/version_info.cc"], cmd= - "$(location //tensorflow/tools/git:gen_git_source.py) --generate $(SRCS) \"$@\"", + "$(location //tensorflow/tools/git:gen_git_source.py) --generate $(SRCS) \"$@\" --git_tag_override=$${GIT_TAG_OVERRIDE:-}", local=1, tools=[clean_dep("//tensorflow/tools/git:gen_git_source.py")],) diff --git a/tensorflow/tools/api/golden/tensorflow.estimator.-run-config.pbtxt b/tensorflow/tools/api/golden/tensorflow.estimator.-run-config.pbtxt index 05e603efb7..c8da55d802 100644 --- a/tensorflow/tools/api/golden/tensorflow.estimator.-run-config.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.estimator.-run-config.pbtxt @@ -6,6 +6,10 @@ tf_class { name: "cluster_spec" mtype: "" } + member { + name: "device_fn" + mtype: "" + } member { name: "evaluation_master" mtype: "" @@ -84,7 +88,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'model_dir\', \'tf_random_seed\', \'save_summary_steps\', \'save_checkpoints_steps\', \'save_checkpoints_secs\', \'session_config\', \'keep_checkpoint_max\', \'keep_checkpoint_every_n_hours\', \'log_step_count_steps\', \'train_distribute\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'100\', \'\', \'\', \'None\', \'5\', \'10000\', \'100\', \'None\'], " + argspec: "args=[\'self\', \'model_dir\', \'tf_random_seed\', \'save_summary_steps\', \'save_checkpoints_steps\', \'save_checkpoints_secs\', \'session_config\', \'keep_checkpoint_max\', \'keep_checkpoint_every_n_hours\', \'log_step_count_steps\', \'train_distribute\', \'device_fn\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'100\', \'\', \'\', \'None\', \'5\', \'10000\', \'100\', \'None\', \'None\'], " } member_method { name: "replace" diff --git a/tensorflow/tools/api/golden/tensorflow.pbtxt b/tensorflow/tools/api/golden/tensorflow.pbtxt index c66249999f..0b12bc060e 100644 --- a/tensorflow/tools/api/golden/tensorflow.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.pbtxt @@ -1980,6 +1980,10 @@ tf_module { name: "string_split" argspec: "args=[\'source\', \'delimiter\', \'skip_empty\'], varargs=None, keywords=None, defaults=[\' \', \'True\'], " } + member_method { + name: "string_strip" + argspec: "args=[\'input\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " + } member_method { name: "string_to_hash_bucket" argspec: "args=[\'string_tensor\', \'num_buckets\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " diff --git a/tensorflow/tools/ci_build/builds/pip.sh b/tensorflow/tools/ci_build/builds/pip.sh index 82042b93c0..5fa75e1d61 100755 --- a/tensorflow/tools/ci_build/builds/pip.sh +++ b/tensorflow/tools/ci_build/builds/pip.sh @@ -123,6 +123,10 @@ done BAZEL_FLAGS=$(str_strip "${BAZEL_FLAGS}") +if [[ -z "$GIT_TAG_OVERRIDE" ]]; then + BAZEL_FLAGS+=" --action_env=GIT_TAG_OVERRIDE" +fi + echo "Using Bazel flags: ${BAZEL_FLAGS}" PIP_BUILD_TARGET="//tensorflow/tools/pip_package:build_pip_package" diff --git a/tensorflow/tools/ci_build/builds/test_user_ops.sh b/tensorflow/tools/ci_build/builds/test_user_ops.sh index caa3a40817..c342367bac 100755 --- a/tensorflow/tools/ci_build/builds/test_user_ops.sh +++ b/tensorflow/tools/ci_build/builds/test_user_ops.sh @@ -213,27 +213,34 @@ USER_OP=$(echo "${USER_OP_SO}" | sed -e 's/\.so//') echo "Invoking user op ${USER_OP} defined in file ${USER_OP_SO} "\ "via pip installation" -ORIG_OUTPUT=$("${PYTHON_BIN_PATH}" -c "import tensorflow as tf; print(tf.Session('').run(tf.load_op_library('./${USER_OP_SO}').${USER_OP}(${OP_INPUT})))") - -# Format OUTPUT for analysis -if [[ -z $(echo "${ORIG_OUTPUT}" | grep -o ',') ]]; then - if [[ ${IS_MAC} == "1" ]]; then - OUTPUT=$(echo "${ORIG_OUTPUT}" | sed -E -e 's/[ \t]+/,/g') +function run_op() { + local ORIG_OUTPUT=$1 + local ADDITIONAL_LOG=$2 + + # Format OUTPUT for analysis + if [[ -z $(echo "${ORIG_OUTPUT}" | grep -o ',') ]]; then + if [[ ${IS_MAC} == "1" ]]; then + local OUTPUT=$(echo "${ORIG_OUTPUT}" | sed -E -e 's/[ \t]+/,/g') + else + local OUTPUT=$(echo "${ORIG_OUTPUT}" | sed -r -e 's/[ \t]+/,/g') + fi else - OUTPUT=$(echo "${ORIG_OUTPUT}" | sed -r -e 's/[ \t]+/,/g') + local OUTPUT="${ORIG_OUTPUT}" fi -else - OUTPUT="${ORIG_OUTPUT}" -fi -EQUALS_EXPECTED=$("${PYTHON_BIN_PATH}" -c "print(${OUTPUT} == ${EXPECTED_OUTPUT})") + local EQUALS_EXPECTED=$("${PYTHON_BIN_PATH}" -c "print(${OUTPUT} == ${EXPECTED_OUTPUT})") -if [[ "${EQUALS_EXPECTED}" != "True" ]]; then - die "FAILED: Output from user op (${OUTPUT}) does not match expected "\ -"output ${EXPECTED_OUTPUT}" -else - echo "Output from user op (${OUTPUT}) matches expected output" -fi + if [[ "${EQUALS_EXPECTED}" != "True" ]]; then + local ERROR="FAILED: Output from user op (${OUTPUT}) does not match expected "\ + "output ${EXPECTED_OUTPUT}"${ADDITIONAL_LOG} + die ${ERROR} + else + echo "Output from user op (${OUTPUT}) matches expected output" + fi +} + +run_op $("${PYTHON_BIN_PATH}" -c "import tensorflow as tf; print(tf.Session('').run(tf.load_op_library('./${USER_OP_SO}').${USER_OP}(${OP_INPUT})))") +run_op $("${PYTHON_BIN_PATH}" -c "import tensorflow as tf; tf.enable_eager_execution(); print(tf.load_op_library('./${USER_OP_SO}').${USER_OP}(${OP_INPUT}))") " in eager mode" popd diff --git a/tensorflow/tools/ci_build/linux/cpu/run_mkl.sh b/tensorflow/tools/ci_build/linux/cpu/run_mkl.sh index dbf376be6f..2a9f295188 100755 --- a/tensorflow/tools/ci_build/linux/cpu/run_mkl.sh +++ b/tensorflow/tools/ci_build/linux/cpu/run_mkl.sh @@ -30,7 +30,10 @@ export PYTHON_BIN_PATH=`which python2` yes "" | $PYTHON_BIN_PATH configure.py # Run bazel test command. Double test timeouts to avoid flakes. +# Setting KMP_BLOCKTIME to 0 lets OpenMP threads to sleep right after parallel execution +# in an MKL primitive. This reduces the effects of an oversubscription of OpenMP threads +# caused by executing multiple tests concurrently. bazel test --test_tag_filters=-no_oss,-oss_serial,-gpu,-benchmark-test --test_lang_filters=py -k \ --jobs=${N_JOBS} --test_timeout 300,450,1200,3600 --build_tests_only \ - --config=mkl --config=opt --test_output=errors -- \ + --config=mkl --test_env=KMP_BLOCKTIME=0 --config=opt --test_output=errors -- \ //tensorflow/... -//tensorflow/compiler/... -//tensorflow/contrib/... diff --git a/tensorflow/tools/ci_build/windows/gpu/cmake/run_py.bat b/tensorflow/tools/ci_build/windows/gpu/cmake/run_py.bat index 97829892b1..3b437d3c58 100644 --- a/tensorflow/tools/ci_build/windows/gpu/cmake/run_py.bat +++ b/tensorflow/tools/ci_build/windows/gpu/cmake/run_py.bat @@ -31,6 +31,9 @@ IF DEFINED PIP_EXE (ECHO PIP_EXE is set to %PIP_EXE%) ELSE (SET PIP_EXE="C:\Prog :: Set ctest binary location. IF DEFINED CTEST_EXE (ECHO CTEST_EXE is set to %CTEST_EXE%) ELSE (SET CTEST_EXE="C:\Program Files\cmake\bin\ctest.exe") +:: Install absl-py. +%PIP_EXE% install --upgrade absl-py + :: Run the CMAKE build to build the pip package. CALL %REPO_ROOT%\tensorflow\tools\ci_build\windows\gpu\cmake\run_build.bat if %errorlevel% neq 0 exit /b %errorlevel% @@ -40,9 +43,6 @@ DIR %REPO_ROOT%\%BUILD_DIR%\tf_python\dist\ /S /B > wheel_filename_file set /p WHEEL_FILENAME= const char* tf_git_version() {return "%s";} -const char* tf_compiler_version() {return __VERSION__;} +const char* tf_compiler_version() { +#ifdef _MSC_VER +#define STRINGIFY(x) #x +#define TOSTRING(x) STRINGIFY(x) + return "MSVC " TOSTRING(_MSC_FULL_VER); +#else + return __VERSION__; +#endif +} const int tf_cxx11_abi_flag() { #ifdef _GLIBCXX_USE_CXX11_ABI return _GLIBCXX_USE_CXX11_ABI; @@ -197,7 +216,7 @@ const int tf_monolithic_build() { open(filename, "w").write(contents) -def generate(arglist): +def generate(arglist, git_tag_override=None): """Generate version_info.cc as given `destination_file`. Args: @@ -217,6 +236,10 @@ def generate(arglist): `ref_symlink` is unused in this script but passed, because the build system uses that file to detect when commits happen. + git_tag_override: Override the value for the git tag. This is useful for + releases where we want to build the release before the git tag is + created. + Raises: RuntimeError: If ./configure needs to be run, RuntimeError will be raised. """ @@ -234,11 +257,11 @@ def generate(arglist): raise RuntimeError( "Run ./configure again, branch was '%s' but is now '%s'" % (old_branch, new_branch)) - git_version = get_git_version(data["path"]) + git_version = get_git_version(data["path"], git_tag_override) write_version_info(dest_file, git_version) -def raw_generate(output_file): +def raw_generate(output_file, source_dir, git_tag_override=None): """Simple generator used for cmake/make build systems. This does not create any symlinks. It requires the build system @@ -246,9 +269,13 @@ def raw_generate(output_file): Args: output_file: Output filename for the version info cc + source_dir: Base path of the source code + git_tag_override: Override the value for the git tag. This is useful for + releases where we want to build the release before the git tag is + created. """ - git_version = get_git_version(".") + git_version = get_git_version(source_dir, git_tag_override) write_version_info(output_file, git_version) @@ -270,6 +297,11 @@ parser.add_argument( "--gen_root_path", type=str, help="Root path to place generated git files (created by --configure).") +parser.add_argument( + "--git_tag_override", type=str, + help="Override git tag value in the __git_version__ string. Useful when " + "creating release builds before the release tag is created.") + parser.add_argument( "--generate", type=str, @@ -281,6 +313,11 @@ parser.add_argument( type=str, help="Generate version_info.cc (simpler version used for cmake/make)") +parser.add_argument( + "--source_dir", + type=str, + help="Base path of the source code (used for cmake/make)") + args = parser.parse_args() if args.configure is not None: @@ -288,9 +325,12 @@ if args.configure is not None: raise RuntimeError("Must pass --gen_root_path arg when running --configure") configure(args.configure, args.gen_root_path, debug=args.debug) elif args.generate is not None: - generate(args.generate) + generate(args.generate, args.git_tag_override) elif args.raw_generate is not None: - raw_generate(args.raw_generate) + source_path = "." + if args.source_dir is not None: + source_path = args.source_dir + raw_generate(args.raw_generate, source_path, args.git_tag_override) else: raise RuntimeError("--configure or --generate or --raw_generate " "must be used") diff --git a/tensorflow/tools/git/gen_git_source.sh b/tensorflow/tools/git/gen_git_source.sh index db20bb00e8..cd128af6b3 100755 --- a/tensorflow/tools/git/gen_git_source.sh +++ b/tensorflow/tools/git/gen_git_source.sh @@ -28,7 +28,15 @@ fi cat < ${OUTPUT_FILENAME} #include const char* tf_git_version() {return "${GIT_VERSION}";} -const char* tf_compiler_version() {return __VERSION__;} +const char* tf_compiler_version() { +#ifdef _MSC_VER +#define STRINGIFY(x) #x +#define TOSTRING(x) STRINGIFY(x) + return "MSVC " TOSTRING(_MSC_FULL_VER); +#else + return __VERSION__; +#endif +} const int tf_cxx11_abi_flag() { #ifdef _GLIBCXX_USE_CXX11_ABI return _GLIBCXX_USE_CXX11_ABI; diff --git a/tensorflow/tools/graph_transforms/transform_graph.cc b/tensorflow/tools/graph_transforms/transform_graph.cc index 28387c2b48..8ce8f5e24b 100644 --- a/tensorflow/tools/graph_transforms/transform_graph.cc +++ b/tensorflow/tools/graph_transforms/transform_graph.cc @@ -24,6 +24,9 @@ limitations under the License. #include "tensorflow/core/util/command_line_flags.h" #include "tensorflow/tools/graph_transforms/file_utils.h" #include "tensorflow/tools/graph_transforms/transform_utils.h" +#if !defined(PLATFORM_WINDOWS) +#include +#endif namespace tensorflow { namespace graph_transforms { @@ -130,16 +133,64 @@ Status ParseTransformParameters(const string& transforms_string, return Status::OK(); } +std::string ExpandPath(const std::string& path_string) { +#if defined(PLATFORM_WINDOWS) + return path_string; +#else + if (path_string.empty() || path_string[0] != '~') { + return path_string; + } + + const char* home = NULL; + std::string::size_type prefix = path_string.find_first_of('/'); + if (path_string.length() == 1 || prefix == 1) { + // The value of $HOME, e.g., ~/foo + home = getenv("HOME"); + if (!home) { + // If HOME is not available, get uid + struct passwd* pw = getpwuid(getuid()); + if (pw) { + home = pw->pw_dir; + } + } + } else { + // The value of ~user, e.g., ~user/foo + std::string user(path_string, 1, (prefix == std::string::npos) + ? std::string::npos + : prefix - 1); + struct passwd* pw = getpwnam(user.c_str()); + if (pw) { + home = pw->pw_dir; + } + } + + if (!home) { + return path_string; + } + + string path(home); + if (prefix == std::string::npos) { + return path; + } + + if (path.length() == 0 || path[path.length() - 1] != '/') { + path += '/'; + } + path += path_string.substr(prefix + 1); + return path; +#endif +} + int ParseFlagsAndTransformGraph(int argc, char* argv[], bool init_main) { - string in_graph = ""; - string out_graph = ""; + string in_graph_string = ""; + string out_graph_string = ""; string inputs_string = ""; string outputs_string = ""; string transforms_string = ""; bool output_as_text = false; std::vector flag_list = { - Flag("in_graph", &in_graph, "input graph file name"), - Flag("out_graph", &out_graph, "output graph file name"), + Flag("in_graph", &in_graph_string, "input graph file name"), + Flag("out_graph", &out_graph_string, "output graph file name"), Flag("inputs", &inputs_string, "inputs"), Flag("outputs", &outputs_string, "outputs"), Flag("transforms", &transforms_string, "list of transforms"), @@ -166,11 +217,11 @@ int ParseFlagsAndTransformGraph(int argc, char* argv[], bool init_main) { LOG(ERROR) << "Unknown argument " << argv[1] << ".\n" << usage; return -1; } - if (in_graph.empty()) { + if (in_graph_string.empty()) { LOG(ERROR) << "in_graph graph can't be empty.\n" << usage; return -1; } - if (out_graph.empty()) { + if (out_graph_string.empty()) { LOG(ERROR) << "out_graph graph can't be empty.\n" << usage; return -1; } @@ -179,6 +230,9 @@ int ParseFlagsAndTransformGraph(int argc, char* argv[], bool init_main) { return -1; } + string in_graph = ExpandPath(in_graph_string); + string out_graph = ExpandPath(out_graph_string); + std::vector inputs = str_util::Split(inputs_string, ','); std::vector outputs = str_util::Split(outputs_string, ','); TransformParameters transform_params; @@ -197,7 +251,7 @@ int ParseFlagsAndTransformGraph(int argc, char* argv[], bool init_main) { GraphDef graph_def; Status load_status = LoadTextOrBinaryGraphFile(in_graph, &graph_def); if (!load_status.ok()) { - LOG(ERROR) << "Loading graph '" << in_graph << "' failed with " + LOG(ERROR) << "Loading graph '" << in_graph_string << "' failed with " << load_status.error_message(); LOG(ERROR) << usage; return -1; @@ -219,7 +273,7 @@ int ParseFlagsAndTransformGraph(int argc, char* argv[], bool init_main) { save_status = WriteBinaryProto(Env::Default(), out_graph, graph_def); } if (!save_status.ok()) { - LOG(ERROR) << "Saving graph '" << out_graph << "' failed with " + LOG(ERROR) << "Saving graph '" << out_graph_string << "' failed with " << save_status.error_message(); return -1; } diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py index 211f93296b..f84a91d009 100644 --- a/tensorflow/tools/pip_package/setup.py +++ b/tensorflow/tools/pip_package/setup.py @@ -31,7 +31,7 @@ from setuptools.dist import Distribution # This version string is semver compatible, but incompatible with pip. # For pip, we will remove all '-' characters from this string, and use the # result for pip. -_VERSION = '1.7.0' +_VERSION = '1.8.0-rc0' REQUIRED_PACKAGES = [ 'absl-py >= 0.1.6', diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index bbef4b9e5f..8b26a32eac 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -167,11 +167,12 @@ def tf_workspace(path_prefix="", tf_repo_name=""): tf_http_archive( name = "gemmlowp", urls = [ - "https://mirror.bazel.build/github.com/google/gemmlowp/archive/7c7c744640ddc3d0af18fb245b4d23228813a71b.zip", - "https://github.com/google/gemmlowp/archive/7c7c744640ddc3d0af18fb245b4d23228813a71b.zip", + # TODO (yongtang): uncomment once mirror.bazel.build is propagated. + # "https://mirror.bazel.build/github.com/google/gemmlowp/archive/38ebac7b059e84692f53e5938f97a9943c120d98.zip", + "https://github.com/google/gemmlowp/archive/38ebac7b059e84692f53e5938f97a9943c120d98.zip", ], - sha256 = "b852cc90259a7357c8a323f108f2cec6e85979fc3b18b5590b99e0130044b2cf", - strip_prefix = "gemmlowp-7c7c744640ddc3d0af18fb245b4d23228813a71b", + sha256 = "b87faa7294dfcc5d678f22a59d2c01ca94ea1e2a3b488c38a95a67889ed0a658", + strip_prefix = "gemmlowp-38ebac7b059e84692f53e5938f97a9943c120d98", ) tf_http_archive( diff --git a/third_party/repo.bzl b/third_party/repo.bzl index aa178fa8ca..36f5aa5bde 100644 --- a/third_party/repo.bzl +++ b/third_party/repo.bzl @@ -17,6 +17,7 @@ _SINGLE_URL_WHITELIST = depset([ "arm_compiler", "ortools_archive", + "gemmlowp", ]) def _is_windows(ctx): @@ -68,7 +69,7 @@ def _apply_delete(ctx, paths): _execute_and_check_ret_code(ctx, cmd) def _tf_http_archive(ctx): - if ("mirror.bazel.build" not in ctx.attr.urls[0] or + if ("mirror.bazel.build" not in ctx.attr.urls[0] and (len(ctx.attr.urls) < 2 and ctx.attr.name not in _SINGLE_URL_WHITELIST)): fail("tf_http_archive(urls) must have redundant URLs. The " + -- GitLab From 1bb16a262900dce73e8d757d9ad29feed0c878ad Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 23 Apr 2018 21:46:39 -0700 Subject: [PATCH 187/434] Go: Update generated wrapper functions for TensorFlow ops. PiperOrigin-RevId: 194033378 --- tensorflow/go/op/wrappers.go | 3738 +++++++++++++++++----------------- 1 file changed, 1869 insertions(+), 1869 deletions(-) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index c31ca8b67a..d038846c4f 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -2243,81 +2243,170 @@ func CheckNumerics(scope *Scope, tensor tf.Output, message string) (output tf.Ou return op.Output(0) } -// Returns the complex conjugate of a complex number. +// Gather slices from `params` into a Tensor with shape specified by `indices`. // -// Given a tensor `input` of complex numbers, this operation returns a tensor of -// complex numbers that are the complex conjugate of each element in `input`. The -// complex numbers in `input` must be of the form \\(a + bj\\), where *a* is the -// real part and *b* is the imaginary part. +// `indices` is an K-dimensional integer tensor, best thought of as a +// (K-1)-dimensional tensor of indices into `params`, where each element defines a +// slice of `params`: // -// The complex conjugate returned by this operation is of the form \\(a - bj\\). +// output[i_0, ..., i_{K-2}] = params[indices[i0, ..., i_{K-2}]] // -// For example: +// Whereas in @{tf.gather} `indices` defines slices into the first +// dimension of `params`, in `tf.gather_nd`, `indices` defines slices into the +// first `N` dimensions of `params`, where `N = indices.shape[-1]`. +// +// The last dimension of `indices` can be at most the rank of +// `params`: +// +// indices.shape[-1] <= params.rank +// +// The last dimension of `indices` corresponds to elements +// (if `indices.shape[-1] == params.rank`) or slices +// (if `indices.shape[-1] < params.rank`) along dimension `indices.shape[-1]` +// of `params`. The output tensor has shape +// +// indices.shape[:-1] + params.shape[indices.shape[-1]:] +// +// Note that on CPU, if an out of bound index is found, an error is returned. +// On GPU, if an out of bound index is found, a 0 is stored in the +// corresponding output value. +// +// Some examples below. +// +// Simple indexing into a matrix: // +// ```python +// indices = [[0, 0], [1, 1]] +// params = [['a', 'b'], ['c', 'd']] +// output = ['a', 'd'] // ``` -// # tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j] -// tf.conj(input) ==> [-2.25 - 4.75j, 3.25 - 5.75j] +// +// Slice indexing into a matrix: +// +// ```python +// indices = [[1], [0]] +// params = [['a', 'b'], ['c', 'd']] +// output = [['c', 'd'], ['a', 'b']] // ``` -func Conj(scope *Scope, input tf.Output) (output tf.Output) { +// +// Indexing into a 3-tensor: +// +// ```python +// indices = [[1]] +// params = [[['a0', 'b0'], ['c0', 'd0']], +// [['a1', 'b1'], ['c1', 'd1']]] +// output = [[['a1', 'b1'], ['c1', 'd1']]] +// +// +// indices = [[0, 1], [1, 0]] +// params = [[['a0', 'b0'], ['c0', 'd0']], +// [['a1', 'b1'], ['c1', 'd1']]] +// output = [['c0', 'd0'], ['a1', 'b1']] +// +// +// indices = [[0, 0, 1], [1, 0, 1]] +// params = [[['a0', 'b0'], ['c0', 'd0']], +// [['a1', 'b1'], ['c1', 'd1']]] +// output = ['b0', 'b1'] +// ``` +// +// Batched indexing into a matrix: +// +// ```python +// indices = [[[0, 0]], [[0, 1]]] +// params = [['a', 'b'], ['c', 'd']] +// output = [['a'], ['b']] +// ``` +// +// Batched slice indexing into a matrix: +// +// ```python +// indices = [[[1]], [[0]]] +// params = [['a', 'b'], ['c', 'd']] +// output = [[['c', 'd']], [['a', 'b']]] +// ``` +// +// Batched indexing into a 3-tensor: +// +// ```python +// indices = [[[1]], [[0]]] +// params = [[['a0', 'b0'], ['c0', 'd0']], +// [['a1', 'b1'], ['c1', 'd1']]] +// output = [[[['a1', 'b1'], ['c1', 'd1']]], +// [[['a0', 'b0'], ['c0', 'd0']]]] +// +// indices = [[[0, 1], [1, 0]], [[0, 0], [1, 1]]] +// params = [[['a0', 'b0'], ['c0', 'd0']], +// [['a1', 'b1'], ['c1', 'd1']]] +// output = [[['c0', 'd0'], ['a1', 'b1']], +// [['a0', 'b0'], ['c1', 'd1']]] +// +// +// indices = [[[0, 0, 1], [1, 0, 1]], [[0, 1, 1], [1, 1, 0]]] +// params = [[['a0', 'b0'], ['c0', 'd0']], +// [['a1', 'b1'], ['c1', 'd1']]] +// output = [['b0', 'b1'], ['d0', 'c1']] +// ``` +// +// Arguments: +// params: The tensor from which to gather values. +// indices: Index tensor. +// +// Returns Values from `params` gathered from indices given by `indices`, with +// shape `indices.shape[:-1] + params.shape[indices.shape[-1]:]`. +func GatherNd(scope *Scope, params tf.Output, indices tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Conj", + Type: "GatherNd", Input: []tf.Input{ - input, + params, indices, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// ResourceSparseApplyMomentumAttr is an optional argument to ResourceSparseApplyMomentum. -type ResourceSparseApplyMomentumAttr func(optionalAttr) +// GatherAttr is an optional argument to Gather. +type GatherAttr func(optionalAttr) -// ResourceSparseApplyMomentumUseLocking sets the optional use_locking attribute to value. -// -// value: If `True`, updating of the var and accum tensors will be protected -// by a lock; otherwise the behavior is undefined, but may exhibit less -// contention. -// If not specified, defaults to false -func ResourceSparseApplyMomentumUseLocking(value bool) ResourceSparseApplyMomentumAttr { +// GatherValidateIndices sets the optional validate_indices attribute to value. +// If not specified, defaults to true +func GatherValidateIndices(value bool) GatherAttr { return func(m optionalAttr) { - m["use_locking"] = value + m["validate_indices"] = value } } -// ResourceSparseApplyMomentumUseNesterov sets the optional use_nesterov attribute to value. +// Gather slices from `params` according to `indices`. // -// value: If `True`, the tensor passed to compute grad will be -// var - lr * momentum * accum, so in the end, the var you get is actually -// var - lr * momentum * accum. -// If not specified, defaults to false -func ResourceSparseApplyMomentumUseNesterov(value bool) ResourceSparseApplyMomentumAttr { - return func(m optionalAttr) { - m["use_nesterov"] = value - } -} - -// Update relevant entries in '*var' and '*accum' according to the momentum scheme. +// `indices` must be an integer tensor of any dimension (usually 0-D or 1-D). +// Produces an output tensor with shape `indices.shape + params.shape[1:]` where: // -// Set use_nesterov = True if you want to use Nesterov momentum. +// ```python +// # Scalar indices +// output[:, ..., :] = params[indices, :, ... :] // -// That is for rows we have grad for, we update var and accum as follows: +// # Vector indices +// output[i, :, ..., :] = params[indices[i], :, ... :] // -// accum = accum * momentum + grad -// var -= lr * accum +// # Higher rank indices +// output[i, ..., j, :, ... :] = params[indices[i, ..., j], :, ..., :] +// ``` // -// Arguments: -// var_: Should be from a Variable(). -// accum: Should be from a Variable(). -// lr: Learning rate. Must be a scalar. -// grad: The gradient. -// indices: A vector of indices into the first dimension of var and accum. -// momentum: Momentum. Must be a scalar. +// If `indices` is a permutation and `len(indices) == params.shape[0]` then +// this operation will permute `params` accordingly. // -// Returns the created operation. -func ResourceSparseApplyMomentum(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, indices tf.Output, momentum tf.Output, optional ...ResourceSparseApplyMomentumAttr) (o *tf.Operation) { +// `validate_indices`: DEPRECATED. If this operation is assigned to CPU, values in +// `indices` are always validated to be within range. If assigned to GPU, +// out-of-bound indices result in safe but unspecified behavior, which may include +// raising an error. +// +//
+// +//
+func Gather(scope *Scope, params tf.Output, indices tf.Output, optional ...GatherAttr) (output tf.Output) { if scope.Err() != nil { return } @@ -2326,13 +2415,14 @@ func ResourceSparseApplyMomentum(scope *Scope, var_ tf.Output, accum tf.Output, a(attrs) } opspec := tf.OpSpec{ - Type: "ResourceSparseApplyMomentum", + Type: "Gather", Input: []tf.Input{ - var_, accum, lr, grad, indices, momentum, + params, indices, }, Attrs: attrs, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0) } // Clips tensor values to a specified min and max. @@ -4548,62 +4638,6 @@ func QuantizedBatchNormWithGlobalNormalization(scope *Scope, t tf.Output, t_min return op.Output(0), op.Output(1), op.Output(2) } -// HistogramFixedWidthAttr is an optional argument to HistogramFixedWidth. -type HistogramFixedWidthAttr func(optionalAttr) - -// HistogramFixedWidthDtype sets the optional dtype attribute to value. -// If not specified, defaults to DT_INT32 -func HistogramFixedWidthDtype(value tf.DataType) HistogramFixedWidthAttr { - return func(m optionalAttr) { - m["dtype"] = value - } -} - -// Return histogram of values. -// -// Given the tensor `values`, this operation returns a rank 1 histogram counting -// the number of entries in `values` that fall into every bin. The bins are -// equal width and determined by the arguments `value_range` and `nbins`. -// -// ```python -// # Bins will be: (-inf, 1), [1, 2), [2, 3), [3, 4), [4, inf) -// nbins = 5 -// value_range = [0.0, 5.0] -// new_values = [-1.0, 0.0, 1.5, 2.0, 5.0, 15] -// -// with tf.get_default_session() as sess: -// hist = tf.histogram_fixed_width(new_values, value_range, nbins=5) -// variables.global_variables_initializer().run() -// sess.run(hist) => [2, 1, 1, 0, 2] -// ``` -// -// Arguments: -// values: Numeric `Tensor`. -// value_range: Shape [2] `Tensor` of same `dtype` as `values`. -// values <= value_range[0] will be mapped to hist[0], -// values >= value_range[1] will be mapped to hist[-1]. -// nbins: Scalar `int32 Tensor`. Number of histogram bins. -// -// Returns A 1-D `Tensor` holding histogram of values. -func HistogramFixedWidth(scope *Scope, values tf.Output, value_range tf.Output, nbins tf.Output, optional ...HistogramFixedWidthAttr) (out tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "HistogramFixedWidth", - Input: []tf.Input{ - values, value_range, nbins, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // Adds Tensor 'bias' to Tensor 'input' for Quantized types. // // Broadcasts the values of bias on dimensions 0..N-2 of 'input'. @@ -7020,38 +7054,107 @@ func ParseExample(scope *Scope, serialized tf.Output, names tf.Output, sparse_ke return sparse_indices, sparse_values, sparse_shapes, dense_values } -// Real-valued fast Fourier transform. -// -// Computes the 1-dimensional discrete Fourier transform of a real-valued signal -// over the inner-most dimension of `input`. +// DecodeRawAttr is an optional argument to DecodeRaw. +type DecodeRawAttr func(optionalAttr) + +// DecodeRawLittleEndian sets the optional little_endian attribute to value. // -// Since the DFT of a real signal is Hermitian-symmetric, `RFFT` only returns the -// `fft_length / 2 + 1` unique components of the FFT: the zero-frequency term, -// followed by the `fft_length / 2` positive-frequency terms. -// -// Along the axis `RFFT` is computed on, if `fft_length` is smaller than the -// corresponding dimension of `input`, the dimension is cropped. If it is larger, -// the dimension is padded with zeros. +// value: Whether the input `bytes` are in little-endian order. +// Ignored for `out_type` values that are stored in a single byte like +// `uint8`. +// If not specified, defaults to true +func DecodeRawLittleEndian(value bool) DecodeRawAttr { + return func(m optionalAttr) { + m["little_endian"] = value + } +} + +// Reinterpret the bytes of a string as a vector of numbers. // // Arguments: -// input: A float32 tensor. -// fft_length: An int32 tensor of shape [1]. The FFT length. +// bytes: All the elements must have the same length. // -// Returns A complex64 tensor of the same rank as `input`. The inner-most -// dimension of `input` is replaced with the `fft_length / 2 + 1` unique -// frequency components of its 1D Fourier transform. // -// @compatibility(numpy) -// Equivalent to np.fft.rfft -// @end_compatibility -func RFFT(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) { +// Returns A Tensor with one more dimension than the input `bytes`. The +// added dimension will have size equal to the length of the elements +// of `bytes` divided by the number of bytes to represent `out_type`. +func DecodeRaw(scope *Scope, bytes tf.Output, out_type tf.DataType, optional ...DecodeRawAttr) (output tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"out_type": out_type} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "RFFT", + Type: "DecodeRaw", Input: []tf.Input{ - input, fft_length, + bytes, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Copy a tensor setting everything outside a central band in each innermost matrix +// +// to zero. +// +// The `band` part is computed as follows: +// Assume `input` has `k` dimensions `[I, J, K, ..., M, N]`, then the output is a +// tensor with the same shape where +// +// `band[i, j, k, ..., m, n] = in_band(m, n) * input[i, j, k, ..., m, n]`. +// +// The indicator function +// +// `in_band(m, n) = (num_lower < 0 || (m-n) <= num_lower)) && +// (num_upper < 0 || (n-m) <= num_upper)`. +// +// For example: +// +// ``` +// # if 'input' is [[ 0, 1, 2, 3] +// [-1, 0, 1, 2] +// [-2, -1, 0, 1] +// [-3, -2, -1, 0]], +// +// tf.matrix_band_part(input, 1, -1) ==> [[ 0, 1, 2, 3] +// [-1, 0, 1, 2] +// [ 0, -1, 0, 1] +// [ 0, 0, -1, 0]], +// +// tf.matrix_band_part(input, 2, 1) ==> [[ 0, 1, 0, 0] +// [-1, 0, 1, 0] +// [-2, -1, 0, 1] +// [ 0, -2, -1, 0]] +// ``` +// +// Useful special cases: +// +// ``` +// tf.matrix_band_part(input, 0, -1) ==> Upper triangular part. +// tf.matrix_band_part(input, -1, 0) ==> Lower triangular part. +// tf.matrix_band_part(input, 0, 0) ==> Diagonal. +// ``` +// +// Arguments: +// input: Rank `k` tensor. +// num_lower: 0-D tensor. Number of subdiagonals to keep. If negative, keep entire +// lower triangle. +// num_upper: 0-D tensor. Number of superdiagonals to keep. If negative, keep +// entire upper triangle. +// +// Returns Rank `k` tensor of the same shape as input. The extracted banded tensor. +func MatrixBandPart(scope *Scope, input tf.Output, num_lower tf.Output, num_upper tf.Output) (band tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "MatrixBandPart", + Input: []tf.Input{ + input, num_lower, num_upper, }, } op := scope.AddOperation(opspec) @@ -8207,63 +8310,6 @@ func QuantizedReshape(scope *Scope, tensor tf.Output, shape tf.Output, input_min return op.Output(0), op.Output(1), op.Output(2) } -// GatherAttr is an optional argument to Gather. -type GatherAttr func(optionalAttr) - -// GatherValidateIndices sets the optional validate_indices attribute to value. -// If not specified, defaults to true -func GatherValidateIndices(value bool) GatherAttr { - return func(m optionalAttr) { - m["validate_indices"] = value - } -} - -// Gather slices from `params` according to `indices`. -// -// `indices` must be an integer tensor of any dimension (usually 0-D or 1-D). -// Produces an output tensor with shape `indices.shape + params.shape[1:]` where: -// -// ```python -// # Scalar indices -// output[:, ..., :] = params[indices, :, ... :] -// -// # Vector indices -// output[i, :, ..., :] = params[indices[i], :, ... :] -// -// # Higher rank indices -// output[i, ..., j, :, ... :] = params[indices[i, ..., j], :, ..., :] -// ``` -// -// If `indices` is a permutation and `len(indices) == params.shape[0]` then -// this operation will permute `params` accordingly. -// -// `validate_indices`: DEPRECATED. If this operation is assigned to CPU, values in -// `indices` are always validated to be within range. If assigned to GPU, -// out-of-bound indices result in safe but unspecified behavior, which may include -// raising an error. -// -//
-// -//
-func Gather(scope *Scope, params tf.Output, indices tf.Output, optional ...GatherAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Gather", - Input: []tf.Input{ - params, indices, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // Returns the truth value of (x != y) element-wise. // // *NOTE*: `NotEqual` supports broadcasting. More about broadcasting @@ -8386,6 +8432,98 @@ func StringSplit(scope *Scope, input tf.Output, delimiter tf.Output, optional .. return op.Output(0), op.Output(1), op.Output(2) } +// ResourceSparseApplyMomentumAttr is an optional argument to ResourceSparseApplyMomentum. +type ResourceSparseApplyMomentumAttr func(optionalAttr) + +// ResourceSparseApplyMomentumUseLocking sets the optional use_locking attribute to value. +// +// value: If `True`, updating of the var and accum tensors will be protected +// by a lock; otherwise the behavior is undefined, but may exhibit less +// contention. +// If not specified, defaults to false +func ResourceSparseApplyMomentumUseLocking(value bool) ResourceSparseApplyMomentumAttr { + return func(m optionalAttr) { + m["use_locking"] = value + } +} + +// ResourceSparseApplyMomentumUseNesterov sets the optional use_nesterov attribute to value. +// +// value: If `True`, the tensor passed to compute grad will be +// var - lr * momentum * accum, so in the end, the var you get is actually +// var - lr * momentum * accum. +// If not specified, defaults to false +func ResourceSparseApplyMomentumUseNesterov(value bool) ResourceSparseApplyMomentumAttr { + return func(m optionalAttr) { + m["use_nesterov"] = value + } +} + +// Update relevant entries in '*var' and '*accum' according to the momentum scheme. +// +// Set use_nesterov = True if you want to use Nesterov momentum. +// +// That is for rows we have grad for, we update var and accum as follows: +// +// accum = accum * momentum + grad +// var -= lr * accum +// +// Arguments: +// var_: Should be from a Variable(). +// accum: Should be from a Variable(). +// lr: Learning rate. Must be a scalar. +// grad: The gradient. +// indices: A vector of indices into the first dimension of var and accum. +// momentum: Momentum. Must be a scalar. +// +// Returns the created operation. +func ResourceSparseApplyMomentum(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, indices tf.Output, momentum tf.Output, optional ...ResourceSparseApplyMomentumAttr) (o *tf.Operation) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "ResourceSparseApplyMomentum", + Input: []tf.Input{ + var_, accum, lr, grad, indices, momentum, + }, + Attrs: attrs, + } + return scope.AddOperation(opspec) +} + +// Returns the complex conjugate of a complex number. +// +// Given a tensor `input` of complex numbers, this operation returns a tensor of +// complex numbers that are the complex conjugate of each element in `input`. The +// complex numbers in `input` must be of the form \\(a + bj\\), where *a* is the +// real part and *b* is the imaginary part. +// +// The complex conjugate returned by this operation is of the form \\(a - bj\\). +// +// For example: +// +// ``` +// # tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j] +// tf.conj(input) ==> [-2.25 - 4.75j, 3.25 - 5.75j] +// ``` +func Conj(scope *Scope, input tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Conj", + Input: []tf.Input{ + input, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // ResizeBilinearAttr is an optional argument to ResizeBilinear. type ResizeBilinearAttr func(optionalAttr) @@ -9799,167 +9937,104 @@ func BatchDataset(scope *Scope, input_dataset tf.Output, batch_size tf.Output, o return op.Output(0) } -// Inverse fast Fourier transform. -// -// Computes the inverse 1-dimensional discrete Fourier transform over the -// inner-most dimension of `input`. -// -// Arguments: -// input: A complex64 tensor. -// -// Returns A complex64 tensor of the same shape as `input`. The inner-most -// dimension of `input` is replaced with its inverse 1D Fourier transform. -// -// @compatibility(numpy) -// Equivalent to np.fft.ifft -// @end_compatibility -func IFFT(scope *Scope, input tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "IFFT", - Input: []tf.Input{ - input, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Generates values in an interval. -// -// A sequence of `num` evenly-spaced values are generated beginning at `start`. -// If `num > 1`, the values in the sequence increase by `stop - start / num - 1`, -// so that the last one is exactly `stop`. -// -// For example: -// -// ``` -// tf.linspace(10.0, 12.0, 3, name="linspace") => [ 10.0 11.0 12.0] -// ``` -// -// Arguments: -// start: First entry in the range. -// stop: Last entry in the range. -// num: Number of values to generate. -// -// Returns 1-D. The generated values. -func LinSpace(scope *Scope, start tf.Output, stop tf.Output, num tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "LinSpace", - Input: []tf.Input{ - start, stop, num, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// DestroyResourceOpAttr is an optional argument to DestroyResourceOp. -type DestroyResourceOpAttr func(optionalAttr) +// DecodeAndCropJpegAttr is an optional argument to DecodeAndCropJpeg. +type DecodeAndCropJpegAttr func(optionalAttr) -// DestroyResourceOpIgnoreLookupError sets the optional ignore_lookup_error attribute to value. +// DecodeAndCropJpegChannels sets the optional channels attribute to value. // -// value: whether to ignore the error when the resource -// doesn't exist. -// If not specified, defaults to true -func DestroyResourceOpIgnoreLookupError(value bool) DestroyResourceOpAttr { +// value: Number of color channels for the decoded image. +// If not specified, defaults to 0 +func DecodeAndCropJpegChannels(value int64) DecodeAndCropJpegAttr { return func(m optionalAttr) { - m["ignore_lookup_error"] = value + m["channels"] = value } } -// Deletes the resource specified by the handle. -// -// All subsequent operations using the resource will result in a NotFound -// error status. -// -// Arguments: -// resource: handle to the resource to delete. +// DecodeAndCropJpegRatio sets the optional ratio attribute to value. // -// Returns the created operation. -func DestroyResourceOp(scope *Scope, resource tf.Output, optional ...DestroyResourceOpAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "DestroyResourceOp", - Input: []tf.Input{ - resource, - }, - Attrs: attrs, +// value: Downscaling ratio. +// If not specified, defaults to 1 +func DecodeAndCropJpegRatio(value int64) DecodeAndCropJpegAttr { + return func(m optionalAttr) { + m["ratio"] = value } - return scope.AddOperation(opspec) } -// LRNAttr is an optional argument to LRN. -type LRNAttr func(optionalAttr) - -// LRNDepthRadius sets the optional depth_radius attribute to value. +// DecodeAndCropJpegFancyUpscaling sets the optional fancy_upscaling attribute to value. // -// value: 0-D. Half-width of the 1-D normalization window. -// If not specified, defaults to 5 -func LRNDepthRadius(value int64) LRNAttr { +// value: If true use a slower but nicer upscaling of the +// chroma planes (yuv420/422 only). +// If not specified, defaults to true +func DecodeAndCropJpegFancyUpscaling(value bool) DecodeAndCropJpegAttr { return func(m optionalAttr) { - m["depth_radius"] = value + m["fancy_upscaling"] = value } } -// LRNBias sets the optional bias attribute to value. +// DecodeAndCropJpegTryRecoverTruncated sets the optional try_recover_truncated attribute to value. // -// value: An offset (usually positive to avoid dividing by 0). -// If not specified, defaults to 1 -func LRNBias(value float32) LRNAttr { +// value: If true try to recover an image from truncated input. +// If not specified, defaults to false +func DecodeAndCropJpegTryRecoverTruncated(value bool) DecodeAndCropJpegAttr { return func(m optionalAttr) { - m["bias"] = value + m["try_recover_truncated"] = value } } -// LRNAlpha sets the optional alpha attribute to value. +// DecodeAndCropJpegAcceptableFraction sets the optional acceptable_fraction attribute to value. // -// value: A scale factor, usually positive. +// value: The minimum required fraction of lines before a truncated +// input is accepted. // If not specified, defaults to 1 -func LRNAlpha(value float32) LRNAttr { +func DecodeAndCropJpegAcceptableFraction(value float32) DecodeAndCropJpegAttr { return func(m optionalAttr) { - m["alpha"] = value + m["acceptable_fraction"] = value } } -// LRNBeta sets the optional beta attribute to value. +// DecodeAndCropJpegDctMethod sets the optional dct_method attribute to value. // -// value: An exponent. -// If not specified, defaults to 0.5 -func LRNBeta(value float32) LRNAttr { +// value: string specifying a hint about the algorithm used for +// decompression. Defaults to "" which maps to a system-specific +// default. Currently valid values are ["INTEGER_FAST", +// "INTEGER_ACCURATE"]. The hint may be ignored (e.g., the internal +// jpeg library changes to a version that does not have that specific +// option.) +// If not specified, defaults to "" +func DecodeAndCropJpegDctMethod(value string) DecodeAndCropJpegAttr { return func(m optionalAttr) { - m["beta"] = value + m["dct_method"] = value } } -// Local Response Normalization. +// Decode and Crop a JPEG-encoded image to a uint8 tensor. // -// The 4-D `input` tensor is treated as a 3-D array of 1-D vectors (along the last -// dimension), and each vector is normalized independently. Within a given vector, -// each component is divided by the weighted, squared sum of inputs within -// `depth_radius`. In detail, +// The attr `channels` indicates the desired number of color channels for the +// decoded image. // -// sqr_sum[a, b, c, d] = -// sum(input[a, b, c, d - depth_radius : d + depth_radius + 1] ** 2) -// output = input / (bias + alpha * sqr_sum) ** beta +// Accepted values are: // -// For details, see [Krizhevsky et al., ImageNet classification with deep -// convolutional neural networks (NIPS 2012)](http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks). +// * 0: Use the number of channels in the JPEG-encoded image. +// * 1: output a grayscale image. +// * 3: output an RGB image. +// +// If needed, the JPEG-encoded image is transformed to match the requested number +// of color channels. +// +// The attr `ratio` allows downscaling the image by an integer factor during +// decoding. Allowed values are: 1, 2, 4, and 8. This is much faster than +// downscaling the image later. +// +// +// It is equivalent to a combination of decode and crop, but much faster by only +// decoding partial jpeg image. // // Arguments: -// input: 4-D. -func LRN(scope *Scope, input tf.Output, optional ...LRNAttr) (output tf.Output) { +// contents: 0-D. The JPEG-encoded image. +// crop_window: 1-D. The crop window: [crop_y, crop_x, crop_height, crop_width]. +// +// Returns 3-D with shape `[height, width, channels]`.. +func DecodeAndCropJpeg(scope *Scope, contents tf.Output, crop_window tf.Output, optional ...DecodeAndCropJpegAttr) (image tf.Output) { if scope.Err() != nil { return } @@ -9968,9 +10043,9 @@ func LRN(scope *Scope, input tf.Output, optional ...LRNAttr) (output tf.Output) a(attrs) } opspec := tf.OpSpec{ - Type: "LRN", + Type: "DecodeAndCropJpeg", Input: []tf.Input{ - input, + contents, crop_window, }, Attrs: attrs, } @@ -9978,249 +10053,273 @@ func LRN(scope *Scope, input tf.Output, optional ...LRNAttr) (output tf.Output) return op.Output(0) } -// Creates a dataset that zips together `input_datasets`. -func ZipDataset(scope *Scope, input_datasets []tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "ZipDataset", - Input: []tf.Input{ - tf.OutputList(input_datasets), - }, - Attrs: attrs, +// AllCandidateSamplerAttr is an optional argument to AllCandidateSampler. +type AllCandidateSamplerAttr func(optionalAttr) + +// AllCandidateSamplerSeed sets the optional seed attribute to value. +// +// value: If either seed or seed2 are set to be non-zero, the random number +// generator is seeded by the given seed. Otherwise, it is seeded by a +// random seed. +// If not specified, defaults to 0 +func AllCandidateSamplerSeed(value int64) AllCandidateSamplerAttr { + return func(m optionalAttr) { + m["seed"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// ResourceSparseApplyAdagradAttr is an optional argument to ResourceSparseApplyAdagrad. -type ResourceSparseApplyAdagradAttr func(optionalAttr) - -// ResourceSparseApplyAdagradUseLocking sets the optional use_locking attribute to value. +// AllCandidateSamplerSeed2 sets the optional seed2 attribute to value. // -// value: If `True`, updating of the var and accum tensors will be protected -// by a lock; otherwise the behavior is undefined, but may exhibit less -// contention. -// If not specified, defaults to false -func ResourceSparseApplyAdagradUseLocking(value bool) ResourceSparseApplyAdagradAttr { +// value: An second seed to avoid seed collision. +// If not specified, defaults to 0 +func AllCandidateSamplerSeed2(value int64) AllCandidateSamplerAttr { return func(m optionalAttr) { - m["use_locking"] = value + m["seed2"] = value } } -// Update relevant entries in '*var' and '*accum' according to the adagrad scheme. +// Generates labels for candidate sampling with a learned unigram distribution. // -// That is for rows we have grad for, we update var and accum as follows: -// accum += grad * grad -// var -= lr * grad * (1 / sqrt(accum)) +// See explanations of candidate sampling and the data formats at +// go/candidate-sampling. +// +// For each batch, this op picks a single set of sampled candidate labels. +// +// The advantages of sampling candidates per-batch are simplicity and the +// possibility of efficient dense matrix multiplication. The disadvantage is that +// the sampled candidates must be chosen independently of the context and of the +// true labels. // // Arguments: -// var_: Should be from a Variable(). -// accum: Should be from a Variable(). -// lr: Learning rate. Must be a scalar. -// grad: The gradient. -// indices: A vector of indices into the first dimension of var and accum. +// true_classes: A batch_size * num_true matrix, in which each row contains the +// IDs of the num_true target_classes in the corresponding original label. +// num_true: Number of true labels per context. +// num_sampled: Number of candidates to produce. +// unique: If unique is true, we sample with rejection, so that all sampled +// candidates in a batch are unique. This requires some approximation to +// estimate the post-rejection sampling probabilities. // -// Returns the created operation. -func ResourceSparseApplyAdagrad(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyAdagradAttr) (o *tf.Operation) { +// Returns A vector of length num_sampled, in which each element is +// the ID of a sampled candidate.A batch_size * num_true matrix, representing +// the number of times each candidate is expected to occur in a batch +// of sampled candidates. If unique=true, then this is a probability.A vector of length num_sampled, for each sampled +// candidate representing the number of times the candidate is expected +// to occur in a batch of sampled candidates. If unique=true, then this is a +// probability. +func AllCandidateSampler(scope *Scope, true_classes tf.Output, num_true int64, num_sampled int64, unique bool, optional ...AllCandidateSamplerAttr) (sampled_candidates tf.Output, true_expected_count tf.Output, sampled_expected_count tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"num_true": num_true, "num_sampled": num_sampled, "unique": unique} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "ResourceSparseApplyAdagrad", + Type: "AllCandidateSampler", Input: []tf.Input{ - var_, accum, lr, grad, indices, + true_classes, }, Attrs: attrs, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2) } -// 2D real-valued fast Fourier transform. +// Adds two `SparseTensor` objects to produce another `SparseTensor`. // -// Computes the 2-dimensional discrete Fourier transform of a real-valued signal -// over the inner-most 2 dimensions of `input`. +// The input `SparseTensor` objects' indices are assumed ordered in standard +// lexicographic order. If this is not the case, before this step run +// `SparseReorder` to restore index ordering. // -// Since the DFT of a real signal is Hermitian-symmetric, `RFFT2D` only returns the -// `fft_length / 2 + 1` unique components of the FFT for the inner-most dimension -// of `output`: the zero-frequency term, followed by the `fft_length / 2` -// positive-frequency terms. +// By default, if two values sum to zero at some index, the output `SparseTensor` +// would still include that particular location in its index, storing a zero in the +// corresponding value slot. To override this, callers can specify `thresh`, +// indicating that if the sum has a magnitude strictly smaller than `thresh`, its +// corresponding value and index would then not be included. In particular, +// `thresh == 0` (default) means everything is kept and actual thresholding happens +// only for a positive value. // -// Along each axis `RFFT2D` is computed on, if `fft_length` is smaller than the -// corresponding dimension of `input`, the dimension is cropped. If it is larger, -// the dimension is padded with zeros. +// In the following shapes, `nnz` is the count after taking `thresh` into account. // // Arguments: -// input: A float32 tensor. -// fft_length: An int32 tensor of shape [2]. The FFT length for each dimension. -// -// Returns A complex64 tensor of the same rank as `input`. The inner-most 2 -// dimensions of `input` are replaced with their 2D Fourier transform. The -// inner-most dimension contains `fft_length / 2 + 1` unique frequency -// components. -// -// @compatibility(numpy) -// Equivalent to np.fft.rfft2 -// @end_compatibility -func RFFT2D(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) { +// a_indices: 2-D. The `indices` of the first `SparseTensor`, size `[nnz, ndims]` Matrix. +// a_values: 1-D. The `values` of the first `SparseTensor`, size `[nnz]` Vector. +// a_shape: 1-D. The `shape` of the first `SparseTensor`, size `[ndims]` Vector. +// b_indices: 2-D. The `indices` of the second `SparseTensor`, size `[nnz, ndims]` Matrix. +// b_values: 1-D. The `values` of the second `SparseTensor`, size `[nnz]` Vector. +// b_shape: 1-D. The `shape` of the second `SparseTensor`, size `[ndims]` Vector. +// thresh: 0-D. The magnitude threshold that determines if an output value/index +// pair takes space. +func SparseAdd(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf.Output, b_indices tf.Output, b_values tf.Output, b_shape tf.Output, thresh tf.Output) (sum_indices tf.Output, sum_values tf.Output, sum_shape tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "RFFT2D", + Type: "SparseAdd", Input: []tf.Input{ - input, fft_length, + a_indices, a_values, a_shape, b_indices, b_values, b_shape, thresh, }, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1), op.Output(2) } -// ResizeAreaAttr is an optional argument to ResizeArea. -type ResizeAreaAttr func(optionalAttr) +// OrderedMapPeekAttr is an optional argument to OrderedMapPeek. +type OrderedMapPeekAttr func(optionalAttr) -// ResizeAreaAlignCorners sets the optional align_corners attribute to value. +// OrderedMapPeekCapacity sets the optional capacity attribute to value. +// If not specified, defaults to 0 // -// value: If true, the centers of the 4 corner pixels of the input and output tensors are -// aligned, preserving the values at the corner pixels. Defaults to false. -// If not specified, defaults to false -func ResizeAreaAlignCorners(value bool) ResizeAreaAttr { +// REQUIRES: value >= 0 +func OrderedMapPeekCapacity(value int64) OrderedMapPeekAttr { return func(m optionalAttr) { - m["align_corners"] = value + m["capacity"] = value } } -// Resize `images` to `size` using area interpolation. -// -// Input images can be of different types but output images are always float. -// -// The range of pixel values for the output image might be slightly different -// from the range for the input image because of limited numerical precision. -// To guarantee an output range, for example `[0.0, 1.0]`, apply -// `tf.clip_by_value` to the output. -// -// Each output pixel is computed by first transforming the pixel's footprint into -// the input tensor and then averaging the pixels that intersect the footprint. An -// input pixel's contribution to the average is weighted by the fraction of its -// area that intersects the footprint. This is the same as OpenCV's INTER_AREA. +// OrderedMapPeekMemoryLimit sets the optional memory_limit attribute to value. +// If not specified, defaults to 0 // -// Arguments: -// images: 4-D with shape `[batch, height, width, channels]`. -// size: = A 1-D int32 Tensor of 2 elements: `new_height, new_width`. The -// new size for the images. +// REQUIRES: value >= 0 +func OrderedMapPeekMemoryLimit(value int64) OrderedMapPeekAttr { + return func(m optionalAttr) { + m["memory_limit"] = value + } +} + +// OrderedMapPeekContainer sets the optional container attribute to value. +// If not specified, defaults to "" +func OrderedMapPeekContainer(value string) OrderedMapPeekAttr { + return func(m optionalAttr) { + m["container"] = value + } +} + +// OrderedMapPeekSharedName sets the optional shared_name attribute to value. +// If not specified, defaults to "" +func OrderedMapPeekSharedName(value string) OrderedMapPeekAttr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// Op peeks at the values at the specified key. If the // -// Returns 4-D with shape -// `[batch, new_height, new_width, channels]`. -func ResizeArea(scope *Scope, images tf.Output, size tf.Output, optional ...ResizeAreaAttr) (resized_images tf.Output) { +// underlying container does not contain this key +// this op will block until it does. This Op is optimized for +// performance. +func OrderedMapPeek(scope *Scope, key tf.Output, indices tf.Output, dtypes []tf.DataType, optional ...OrderedMapPeekAttr) (values []tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"dtypes": dtypes} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "ResizeArea", + Type: "OrderedMapPeek", Input: []tf.Input{ - images, size, + key, indices, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + if scope.Err() != nil { + return + } + var idx int + var err error + if values, idx, err = makeOutputList(op, idx, "values"); err != nil { + scope.UpdateErr("OrderedMapPeek", err) + return + } + return values } -// Pads a tensor with zeros. -// -// This operation pads a `input` with zeros according to the `paddings` you -// specify. `paddings` is an integer tensor with shape `[Dn, 2]`, where n is the -// rank of `input`. For each dimension D of `input`, `paddings[D, 0]` indicates -// how many zeros to add before the contents of `input` in that dimension, and -// `paddings[D, 1]` indicates how many zeros to add after the contents of `input` -// in that dimension. +// Inverse fast Fourier transform. // -// The padded size of each dimension D of the output is: +// Computes the inverse 1-dimensional discrete Fourier transform over the +// inner-most dimension of `input`. // -// `paddings(D, 0) + input.dim_size(D) + paddings(D, 1)` +// Arguments: +// input: A complex64 tensor. // -// For example: +// Returns A complex64 tensor of the same shape as `input`. The inner-most +// dimension of `input` is replaced with its inverse 1D Fourier transform. // -// ``` -// # 't' is [[1, 1], [2, 2]] -// # 'paddings' is [[1, 1], [2, 2]] -// # rank of 't' is 2 -// pad(t, paddings) ==> [[0, 0, 0, 0, 0, 0] -// [0, 0, 1, 1, 0, 0] -// [0, 0, 2, 2, 0, 0] -// [0, 0, 0, 0, 0, 0]] -// ``` -func Pad(scope *Scope, input tf.Output, paddings tf.Output) (output tf.Output) { +// @compatibility(numpy) +// Equivalent to np.fft.ifft +// @end_compatibility +func IFFT(scope *Scope, input tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Pad", + Type: "IFFT", Input: []tf.Input{ - input, paddings, + input, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Checks whether a resource handle-based variable has been initialized. +// Generates values in an interval. +// +// A sequence of `num` evenly-spaced values are generated beginning at `start`. +// If `num > 1`, the values in the sequence increase by `stop - start / num - 1`, +// so that the last one is exactly `stop`. +// +// For example: +// +// ``` +// tf.linspace(10.0, 12.0, 3, name="linspace") => [ 10.0 11.0 12.0] +// ``` // // Arguments: -// resource: the input resource handle. +// start: First entry in the range. +// stop: Last entry in the range. +// num: Number of values to generate. // -// Returns a scalar boolean which is true if the variable has been -// initialized. -func VarIsInitializedOp(scope *Scope, resource tf.Output) (is_initialized tf.Output) { +// Returns 1-D. The generated values. +func LinSpace(scope *Scope, start tf.Output, stop tf.Output, num tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "VarIsInitializedOp", + Type: "LinSpace", Input: []tf.Input{ - resource, + start, stop, num, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// StatelessRandomUniformAttr is an optional argument to StatelessRandomUniform. -type StatelessRandomUniformAttr func(optionalAttr) +// DestroyResourceOpAttr is an optional argument to DestroyResourceOp. +type DestroyResourceOpAttr func(optionalAttr) -// StatelessRandomUniformDtype sets the optional dtype attribute to value. +// DestroyResourceOpIgnoreLookupError sets the optional ignore_lookup_error attribute to value. // -// value: The type of the output. -// If not specified, defaults to DT_FLOAT -func StatelessRandomUniformDtype(value tf.DataType) StatelessRandomUniformAttr { +// value: whether to ignore the error when the resource +// doesn't exist. +// If not specified, defaults to true +func DestroyResourceOpIgnoreLookupError(value bool) DestroyResourceOpAttr { return func(m optionalAttr) { - m["dtype"] = value + m["ignore_lookup_error"] = value } } -// Outputs deterministic pseudorandom random values from a uniform distribution. -// -// The generated values follow a uniform distribution in the range `[0, 1)`. The -// lower bound 0 is included in the range, while the upper bound 1 is excluded. +// Deletes the resource specified by the handle. // -// The outputs are a deterministic function of `shape` and `seed`. +// All subsequent operations using the resource will result in a NotFound +// error status. // // Arguments: -// shape: The shape of the output tensor. -// seed: 2 seeds (shape [2]). +// resource: handle to the resource to delete. // -// Returns Random values with specified shape. -func StatelessRandomUniform(scope *Scope, shape tf.Output, seed tf.Output, optional ...StatelessRandomUniformAttr) (output tf.Output) { +// Returns the created operation. +func DestroyResourceOp(scope *Scope, resource tf.Output, optional ...DestroyResourceOpAttr) (o *tf.Operation) { if scope.Err() != nil { return } @@ -10229,23 +10328,437 @@ func StatelessRandomUniform(scope *Scope, shape tf.Output, seed tf.Output, optio a(attrs) } opspec := tf.OpSpec{ - Type: "StatelessRandomUniform", + Type: "DestroyResourceOp", Input: []tf.Input{ - shape, seed, + resource, }, Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// Makes its input available to the next iteration. -// -// Arguments: -// data: The tensor to be made available to the next iteration. +// ResourceSparseApplyRMSPropAttr is an optional argument to ResourceSparseApplyRMSProp. +type ResourceSparseApplyRMSPropAttr func(optionalAttr) + +// ResourceSparseApplyRMSPropUseLocking sets the optional use_locking attribute to value. // -// Returns The same tensor as `data`. -func NextIteration(scope *Scope, data tf.Output) (output tf.Output) { +// value: If `True`, updating of the var, ms, and mom tensors is protected +// by a lock; otherwise the behavior is undefined, but may exhibit less +// contention. +// If not specified, defaults to false +func ResourceSparseApplyRMSPropUseLocking(value bool) ResourceSparseApplyRMSPropAttr { + return func(m optionalAttr) { + m["use_locking"] = value + } +} + +// Update '*var' according to the RMSProp algorithm. +// +// Note that in dense implementation of this algorithm, ms and mom will +// update even if the grad is zero, but in this sparse implementation, ms +// and mom will not update in iterations during which the grad is zero. +// +// mean_square = decay * mean_square + (1-decay) * gradient ** 2 +// Delta = learning_rate * gradient / sqrt(mean_square + epsilon) +// +// ms <- rho * ms_{t-1} + (1-rho) * grad * grad +// mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon) +// var <- var - mom +// +// Arguments: +// var_: Should be from a Variable(). +// ms: Should be from a Variable(). +// mom: Should be from a Variable(). +// lr: Scaling factor. Must be a scalar. +// rho: Decay rate. Must be a scalar. +// +// epsilon: Ridge term. Must be a scalar. +// grad: The gradient. +// indices: A vector of indices into the first dimension of var, ms and mom. +// +// Returns the created operation. +func ResourceSparseApplyRMSProp(scope *Scope, var_ tf.Output, ms tf.Output, mom tf.Output, lr tf.Output, rho tf.Output, momentum tf.Output, epsilon tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyRMSPropAttr) (o *tf.Operation) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "ResourceSparseApplyRMSProp", + Input: []tf.Input{ + var_, ms, mom, lr, rho, momentum, epsilon, grad, indices, + }, + Attrs: attrs, + } + return scope.AddOperation(opspec) +} + +// Returns the truth value of (x > y) element-wise. +// +// *NOTE*: `Greater` supports broadcasting. More about broadcasting +// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) +func Greater(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Greater", + Input: []tf.Input{ + x, y, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// SampleDistortedBoundingBoxAttr is an optional argument to SampleDistortedBoundingBox. +type SampleDistortedBoundingBoxAttr func(optionalAttr) + +// SampleDistortedBoundingBoxSeed sets the optional seed attribute to value. +// +// value: If either `seed` or `seed2` are set to non-zero, the random number +// generator is seeded by the given `seed`. Otherwise, it is seeded by a random +// seed. +// If not specified, defaults to 0 +func SampleDistortedBoundingBoxSeed(value int64) SampleDistortedBoundingBoxAttr { + return func(m optionalAttr) { + m["seed"] = value + } +} + +// SampleDistortedBoundingBoxSeed2 sets the optional seed2 attribute to value. +// +// value: A second seed to avoid seed collision. +// If not specified, defaults to 0 +func SampleDistortedBoundingBoxSeed2(value int64) SampleDistortedBoundingBoxAttr { + return func(m optionalAttr) { + m["seed2"] = value + } +} + +// SampleDistortedBoundingBoxMinObjectCovered sets the optional min_object_covered attribute to value. +// +// value: The cropped area of the image must contain at least this +// fraction of any bounding box supplied. The value of this parameter should be +// non-negative. In the case of 0, the cropped area does not need to overlap +// any of the bounding boxes supplied. +// If not specified, defaults to 0.1 +func SampleDistortedBoundingBoxMinObjectCovered(value float32) SampleDistortedBoundingBoxAttr { + return func(m optionalAttr) { + m["min_object_covered"] = value + } +} + +// SampleDistortedBoundingBoxAspectRatioRange sets the optional aspect_ratio_range attribute to value. +// +// value: The cropped area of the image must have an aspect ratio = +// width / height within this range. +// If not specified, defaults to +func SampleDistortedBoundingBoxAspectRatioRange(value []float32) SampleDistortedBoundingBoxAttr { + return func(m optionalAttr) { + m["aspect_ratio_range"] = value + } +} + +// SampleDistortedBoundingBoxAreaRange sets the optional area_range attribute to value. +// +// value: The cropped area of the image must contain a fraction of the +// supplied image within in this range. +// If not specified, defaults to +func SampleDistortedBoundingBoxAreaRange(value []float32) SampleDistortedBoundingBoxAttr { + return func(m optionalAttr) { + m["area_range"] = value + } +} + +// SampleDistortedBoundingBoxMaxAttempts sets the optional max_attempts attribute to value. +// +// value: Number of attempts at generating a cropped region of the image +// of the specified constraints. After `max_attempts` failures, return the entire +// image. +// If not specified, defaults to 100 +func SampleDistortedBoundingBoxMaxAttempts(value int64) SampleDistortedBoundingBoxAttr { + return func(m optionalAttr) { + m["max_attempts"] = value + } +} + +// SampleDistortedBoundingBoxUseImageIfNoBoundingBoxes sets the optional use_image_if_no_bounding_boxes attribute to value. +// +// value: Controls behavior if no bounding boxes supplied. +// If true, assume an implicit bounding box covering the whole input. If false, +// raise an error. +// If not specified, defaults to false +func SampleDistortedBoundingBoxUseImageIfNoBoundingBoxes(value bool) SampleDistortedBoundingBoxAttr { + return func(m optionalAttr) { + m["use_image_if_no_bounding_boxes"] = value + } +} + +// Generate a single randomly distorted bounding box for an image. +// +// Bounding box annotations are often supplied in addition to ground-truth labels +// in image recognition or object localization tasks. A common technique for +// training such a system is to randomly distort an image while preserving +// its content, i.e. *data augmentation*. This Op outputs a randomly distorted +// localization of an object, i.e. bounding box, given an `image_size`, +// `bounding_boxes` and a series of constraints. +// +// The output of this Op is a single bounding box that may be used to crop the +// original image. The output is returned as 3 tensors: `begin`, `size` and +// `bboxes`. The first 2 tensors can be fed directly into `tf.slice` to crop the +// image. The latter may be supplied to `tf.image.draw_bounding_boxes` to visualize +// what the bounding box looks like. +// +// Bounding boxes are supplied and returned as `[y_min, x_min, y_max, x_max]`. The +// bounding box coordinates are floats in `[0.0, 1.0]` relative to the width and +// height of the underlying image. +// +// For example, +// +// ```python +// # Generate a single distorted bounding box. +// begin, size, bbox_for_draw = tf.image.sample_distorted_bounding_box( +// tf.shape(image), +// bounding_boxes=bounding_boxes) +// +// # Draw the bounding box in an image summary. +// image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0), +// bbox_for_draw) +// tf.summary.image('images_with_box', image_with_box) +// +// # Employ the bounding box to distort the image. +// distorted_image = tf.slice(image, begin, size) +// ``` +// +// Note that if no bounding box information is available, setting +// `use_image_if_no_bounding_boxes = true` will assume there is a single implicit +// bounding box covering the whole image. If `use_image_if_no_bounding_boxes` is +// false and no bounding boxes are supplied, an error is raised. +// +// Arguments: +// image_size: 1-D, containing `[height, width, channels]`. +// bounding_boxes: 3-D with shape `[batch, N, 4]` describing the N bounding boxes +// associated with the image. +// +// Returns 1-D, containing `[offset_height, offset_width, 0]`. Provide as input to +// `tf.slice`.1-D, containing `[target_height, target_width, -1]`. Provide as input to +// `tf.slice`.3-D with shape `[1, 1, 4]` containing the distorted bounding box. +// Provide as input to `tf.image.draw_bounding_boxes`. +func SampleDistortedBoundingBox(scope *Scope, image_size tf.Output, bounding_boxes tf.Output, optional ...SampleDistortedBoundingBoxAttr) (begin tf.Output, size tf.Output, bboxes tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "SampleDistortedBoundingBox", + Input: []tf.Input{ + image_size, bounding_boxes, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2) +} + +// LRNAttr is an optional argument to LRN. +type LRNAttr func(optionalAttr) + +// LRNDepthRadius sets the optional depth_radius attribute to value. +// +// value: 0-D. Half-width of the 1-D normalization window. +// If not specified, defaults to 5 +func LRNDepthRadius(value int64) LRNAttr { + return func(m optionalAttr) { + m["depth_radius"] = value + } +} + +// LRNBias sets the optional bias attribute to value. +// +// value: An offset (usually positive to avoid dividing by 0). +// If not specified, defaults to 1 +func LRNBias(value float32) LRNAttr { + return func(m optionalAttr) { + m["bias"] = value + } +} + +// LRNAlpha sets the optional alpha attribute to value. +// +// value: A scale factor, usually positive. +// If not specified, defaults to 1 +func LRNAlpha(value float32) LRNAttr { + return func(m optionalAttr) { + m["alpha"] = value + } +} + +// LRNBeta sets the optional beta attribute to value. +// +// value: An exponent. +// If not specified, defaults to 0.5 +func LRNBeta(value float32) LRNAttr { + return func(m optionalAttr) { + m["beta"] = value + } +} + +// Local Response Normalization. +// +// The 4-D `input` tensor is treated as a 3-D array of 1-D vectors (along the last +// dimension), and each vector is normalized independently. Within a given vector, +// each component is divided by the weighted, squared sum of inputs within +// `depth_radius`. In detail, +// +// sqr_sum[a, b, c, d] = +// sum(input[a, b, c, d - depth_radius : d + depth_radius + 1] ** 2) +// output = input / (bias + alpha * sqr_sum) ** beta +// +// For details, see [Krizhevsky et al., ImageNet classification with deep +// convolutional neural networks (NIPS 2012)](http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks). +// +// Arguments: +// input: 4-D. +func LRN(scope *Scope, input tf.Output, optional ...LRNAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "LRN", + Input: []tf.Input{ + input, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Creates a dataset that zips together `input_datasets`. +func ZipDataset(scope *Scope, input_datasets []tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} + opspec := tf.OpSpec{ + Type: "ZipDataset", + Input: []tf.Input{ + tf.OutputList(input_datasets), + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// ResourceSparseApplyAdagradAttr is an optional argument to ResourceSparseApplyAdagrad. +type ResourceSparseApplyAdagradAttr func(optionalAttr) + +// ResourceSparseApplyAdagradUseLocking sets the optional use_locking attribute to value. +// +// value: If `True`, updating of the var and accum tensors will be protected +// by a lock; otherwise the behavior is undefined, but may exhibit less +// contention. +// If not specified, defaults to false +func ResourceSparseApplyAdagradUseLocking(value bool) ResourceSparseApplyAdagradAttr { + return func(m optionalAttr) { + m["use_locking"] = value + } +} + +// Update relevant entries in '*var' and '*accum' according to the adagrad scheme. +// +// That is for rows we have grad for, we update var and accum as follows: +// accum += grad * grad +// var -= lr * grad * (1 / sqrt(accum)) +// +// Arguments: +// var_: Should be from a Variable(). +// accum: Should be from a Variable(). +// lr: Learning rate. Must be a scalar. +// grad: The gradient. +// indices: A vector of indices into the first dimension of var and accum. +// +// Returns the created operation. +func ResourceSparseApplyAdagrad(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyAdagradAttr) (o *tf.Operation) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "ResourceSparseApplyAdagrad", + Input: []tf.Input{ + var_, accum, lr, grad, indices, + }, + Attrs: attrs, + } + return scope.AddOperation(opspec) +} + +// StatelessRandomUniformAttr is an optional argument to StatelessRandomUniform. +type StatelessRandomUniformAttr func(optionalAttr) + +// StatelessRandomUniformDtype sets the optional dtype attribute to value. +// +// value: The type of the output. +// If not specified, defaults to DT_FLOAT +func StatelessRandomUniformDtype(value tf.DataType) StatelessRandomUniformAttr { + return func(m optionalAttr) { + m["dtype"] = value + } +} + +// Outputs deterministic pseudorandom random values from a uniform distribution. +// +// The generated values follow a uniform distribution in the range `[0, 1)`. The +// lower bound 0 is included in the range, while the upper bound 1 is excluded. +// +// The outputs are a deterministic function of `shape` and `seed`. +// +// Arguments: +// shape: The shape of the output tensor. +// seed: 2 seeds (shape [2]). +// +// Returns Random values with specified shape. +func StatelessRandomUniform(scope *Scope, shape tf.Output, seed tf.Output, optional ...StatelessRandomUniformAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "StatelessRandomUniform", + Input: []tf.Input{ + shape, seed, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Makes its input available to the next iteration. +// +// Arguments: +// data: The tensor to be made available to the next iteration. +// +// Returns The same tensor as `data`. +func NextIteration(scope *Scope, data tf.Output) (output tf.Output) { if scope.Err() != nil { return } @@ -10804,47 +11317,42 @@ func SparseDenseCwiseMul(scope *Scope, sp_indices tf.Output, sp_values tf.Output return op.Output(0) } -// ResourceSparseApplyRMSPropAttr is an optional argument to ResourceSparseApplyRMSProp. -type ResourceSparseApplyRMSPropAttr func(optionalAttr) +// ResizeAreaAttr is an optional argument to ResizeArea. +type ResizeAreaAttr func(optionalAttr) -// ResourceSparseApplyRMSPropUseLocking sets the optional use_locking attribute to value. +// ResizeAreaAlignCorners sets the optional align_corners attribute to value. // -// value: If `True`, updating of the var, ms, and mom tensors is protected -// by a lock; otherwise the behavior is undefined, but may exhibit less -// contention. +// value: If true, the centers of the 4 corner pixels of the input and output tensors are +// aligned, preserving the values at the corner pixels. Defaults to false. // If not specified, defaults to false -func ResourceSparseApplyRMSPropUseLocking(value bool) ResourceSparseApplyRMSPropAttr { +func ResizeAreaAlignCorners(value bool) ResizeAreaAttr { return func(m optionalAttr) { - m["use_locking"] = value + m["align_corners"] = value } } -// Update '*var' according to the RMSProp algorithm. +// Resize `images` to `size` using area interpolation. // -// Note that in dense implementation of this algorithm, ms and mom will -// update even if the grad is zero, but in this sparse implementation, ms -// and mom will not update in iterations during which the grad is zero. +// Input images can be of different types but output images are always float. // -// mean_square = decay * mean_square + (1-decay) * gradient ** 2 -// Delta = learning_rate * gradient / sqrt(mean_square + epsilon) +// The range of pixel values for the output image might be slightly different +// from the range for the input image because of limited numerical precision. +// To guarantee an output range, for example `[0.0, 1.0]`, apply +// `tf.clip_by_value` to the output. // -// ms <- rho * ms_{t-1} + (1-rho) * grad * grad -// mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon) -// var <- var - mom +// Each output pixel is computed by first transforming the pixel's footprint into +// the input tensor and then averaging the pixels that intersect the footprint. An +// input pixel's contribution to the average is weighted by the fraction of its +// area that intersects the footprint. This is the same as OpenCV's INTER_AREA. // // Arguments: -// var_: Should be from a Variable(). -// ms: Should be from a Variable(). -// mom: Should be from a Variable(). -// lr: Scaling factor. Must be a scalar. -// rho: Decay rate. Must be a scalar. -// -// epsilon: Ridge term. Must be a scalar. -// grad: The gradient. -// indices: A vector of indices into the first dimension of var, ms and mom. +// images: 4-D with shape `[batch, height, width, channels]`. +// size: = A 1-D int32 Tensor of 2 elements: `new_height, new_width`. The +// new size for the images. // -// Returns the created operation. -func ResourceSparseApplyRMSProp(scope *Scope, var_ tf.Output, ms tf.Output, mom tf.Output, lr tf.Output, rho tf.Output, momentum tf.Output, epsilon tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyRMSPropAttr) (o *tf.Operation) { +// Returns 4-D with shape +// `[batch, new_height, new_width, channels]`. +func ResizeArea(scope *Scope, images tf.Output, size tf.Output, optional ...ResizeAreaAttr) (resized_images tf.Output) { if scope.Err() != nil { return } @@ -10853,184 +11361,113 @@ func ResourceSparseApplyRMSProp(scope *Scope, var_ tf.Output, ms tf.Output, mom a(attrs) } opspec := tf.OpSpec{ - Type: "ResourceSparseApplyRMSProp", + Type: "ResizeArea", Input: []tf.Input{ - var_, ms, mom, lr, rho, momentum, epsilon, grad, indices, + images, size, }, Attrs: attrs, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0) } -// Returns the truth value of (x > y) element-wise. +// 2D real-valued fast Fourier transform. // -// *NOTE*: `Greater` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func Greater(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { +// Computes the 2-dimensional discrete Fourier transform of a real-valued signal +// over the inner-most 2 dimensions of `input`. +// +// Since the DFT of a real signal is Hermitian-symmetric, `RFFT2D` only returns the +// `fft_length / 2 + 1` unique components of the FFT for the inner-most dimension +// of `output`: the zero-frequency term, followed by the `fft_length / 2` +// positive-frequency terms. +// +// Along each axis `RFFT2D` is computed on, if `fft_length` is smaller than the +// corresponding dimension of `input`, the dimension is cropped. If it is larger, +// the dimension is padded with zeros. +// +// Arguments: +// input: A float32 tensor. +// fft_length: An int32 tensor of shape [2]. The FFT length for each dimension. +// +// Returns A complex64 tensor of the same rank as `input`. The inner-most 2 +// dimensions of `input` are replaced with their 2D Fourier transform. The +// inner-most dimension contains `fft_length / 2 + 1` unique frequency +// components. +// +// @compatibility(numpy) +// Equivalent to np.fft.rfft2 +// @end_compatibility +func RFFT2D(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Greater", + Type: "RFFT2D", Input: []tf.Input{ - x, y, + input, fft_length, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// SampleDistortedBoundingBoxAttr is an optional argument to SampleDistortedBoundingBox. -type SampleDistortedBoundingBoxAttr func(optionalAttr) - -// SampleDistortedBoundingBoxSeed sets the optional seed attribute to value. -// -// value: If either `seed` or `seed2` are set to non-zero, the random number -// generator is seeded by the given `seed`. Otherwise, it is seeded by a random -// seed. -// If not specified, defaults to 0 -func SampleDistortedBoundingBoxSeed(value int64) SampleDistortedBoundingBoxAttr { - return func(m optionalAttr) { - m["seed"] = value - } -} - -// SampleDistortedBoundingBoxSeed2 sets the optional seed2 attribute to value. +// Pads a tensor with zeros. // -// value: A second seed to avoid seed collision. -// If not specified, defaults to 0 -func SampleDistortedBoundingBoxSeed2(value int64) SampleDistortedBoundingBoxAttr { - return func(m optionalAttr) { - m["seed2"] = value - } -} - -// SampleDistortedBoundingBoxMinObjectCovered sets the optional min_object_covered attribute to value. +// This operation pads a `input` with zeros according to the `paddings` you +// specify. `paddings` is an integer tensor with shape `[Dn, 2]`, where n is the +// rank of `input`. For each dimension D of `input`, `paddings[D, 0]` indicates +// how many zeros to add before the contents of `input` in that dimension, and +// `paddings[D, 1]` indicates how many zeros to add after the contents of `input` +// in that dimension. // -// value: The cropped area of the image must contain at least this -// fraction of any bounding box supplied. The value of this parameter should be -// non-negative. In the case of 0, the cropped area does not need to overlap -// any of the bounding boxes supplied. -// If not specified, defaults to 0.1 -func SampleDistortedBoundingBoxMinObjectCovered(value float32) SampleDistortedBoundingBoxAttr { - return func(m optionalAttr) { - m["min_object_covered"] = value - } -} - -// SampleDistortedBoundingBoxAspectRatioRange sets the optional aspect_ratio_range attribute to value. +// The padded size of each dimension D of the output is: // -// value: The cropped area of the image must have an aspect ratio = -// width / height within this range. -// If not specified, defaults to -func SampleDistortedBoundingBoxAspectRatioRange(value []float32) SampleDistortedBoundingBoxAttr { - return func(m optionalAttr) { - m["aspect_ratio_range"] = value - } -} - -// SampleDistortedBoundingBoxAreaRange sets the optional area_range attribute to value. +// `paddings(D, 0) + input.dim_size(D) + paddings(D, 1)` // -// value: The cropped area of the image must contain a fraction of the -// supplied image within in this range. -// If not specified, defaults to -func SampleDistortedBoundingBoxAreaRange(value []float32) SampleDistortedBoundingBoxAttr { - return func(m optionalAttr) { - m["area_range"] = value - } -} - -// SampleDistortedBoundingBoxMaxAttempts sets the optional max_attempts attribute to value. +// For example: // -// value: Number of attempts at generating a cropped region of the image -// of the specified constraints. After `max_attempts` failures, return the entire -// image. -// If not specified, defaults to 100 -func SampleDistortedBoundingBoxMaxAttempts(value int64) SampleDistortedBoundingBoxAttr { - return func(m optionalAttr) { - m["max_attempts"] = value +// ``` +// # 't' is [[1, 1], [2, 2]] +// # 'paddings' is [[1, 1], [2, 2]] +// # rank of 't' is 2 +// pad(t, paddings) ==> [[0, 0, 0, 0, 0, 0] +// [0, 0, 1, 1, 0, 0] +// [0, 0, 2, 2, 0, 0] +// [0, 0, 0, 0, 0, 0]] +// ``` +func Pad(scope *Scope, input tf.Output, paddings tf.Output) (output tf.Output) { + if scope.Err() != nil { + return } -} - -// SampleDistortedBoundingBoxUseImageIfNoBoundingBoxes sets the optional use_image_if_no_bounding_boxes attribute to value. -// -// value: Controls behavior if no bounding boxes supplied. -// If true, assume an implicit bounding box covering the whole input. If false, -// raise an error. -// If not specified, defaults to false -func SampleDistortedBoundingBoxUseImageIfNoBoundingBoxes(value bool) SampleDistortedBoundingBoxAttr { - return func(m optionalAttr) { - m["use_image_if_no_bounding_boxes"] = value + opspec := tf.OpSpec{ + Type: "Pad", + Input: []tf.Input{ + input, paddings, + }, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// Generate a single randomly distorted bounding box for an image. -// -// Bounding box annotations are often supplied in addition to ground-truth labels -// in image recognition or object localization tasks. A common technique for -// training such a system is to randomly distort an image while preserving -// its content, i.e. *data augmentation*. This Op outputs a randomly distorted -// localization of an object, i.e. bounding box, given an `image_size`, -// `bounding_boxes` and a series of constraints. -// -// The output of this Op is a single bounding box that may be used to crop the -// original image. The output is returned as 3 tensors: `begin`, `size` and -// `bboxes`. The first 2 tensors can be fed directly into `tf.slice` to crop the -// image. The latter may be supplied to `tf.image.draw_bounding_boxes` to visualize -// what the bounding box looks like. -// -// Bounding boxes are supplied and returned as `[y_min, x_min, y_max, x_max]`. The -// bounding box coordinates are floats in `[0.0, 1.0]` relative to the width and -// height of the underlying image. -// -// For example, -// -// ```python -// # Generate a single distorted bounding box. -// begin, size, bbox_for_draw = tf.image.sample_distorted_bounding_box( -// tf.shape(image), -// bounding_boxes=bounding_boxes) -// -// # Draw the bounding box in an image summary. -// image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0), -// bbox_for_draw) -// tf.summary.image('images_with_box', image_with_box) -// -// # Employ the bounding box to distort the image. -// distorted_image = tf.slice(image, begin, size) -// ``` -// -// Note that if no bounding box information is available, setting -// `use_image_if_no_bounding_boxes = true` will assume there is a single implicit -// bounding box covering the whole image. If `use_image_if_no_bounding_boxes` is -// false and no bounding boxes are supplied, an error is raised. +// Checks whether a resource handle-based variable has been initialized. // // Arguments: -// image_size: 1-D, containing `[height, width, channels]`. -// bounding_boxes: 3-D with shape `[batch, N, 4]` describing the N bounding boxes -// associated with the image. +// resource: the input resource handle. // -// Returns 1-D, containing `[offset_height, offset_width, 0]`. Provide as input to -// `tf.slice`.1-D, containing `[target_height, target_width, -1]`. Provide as input to -// `tf.slice`.3-D with shape `[1, 1, 4]` containing the distorted bounding box. -// Provide as input to `tf.image.draw_bounding_boxes`. -func SampleDistortedBoundingBox(scope *Scope, image_size tf.Output, bounding_boxes tf.Output, optional ...SampleDistortedBoundingBoxAttr) (begin tf.Output, size tf.Output, bboxes tf.Output) { +// Returns a scalar boolean which is true if the variable has been +// initialized. +func VarIsInitializedOp(scope *Scope, resource tf.Output) (is_initialized tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "SampleDistortedBoundingBox", + Type: "VarIsInitializedOp", Input: []tf.Input{ - image_size, bounding_boxes, + resource, }, - Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return op.Output(0) } // Converts each string in the input Tensor to its hash mod by a number of buckets. @@ -13698,6 +14135,44 @@ func InitializeTableFromTextFileV2(scope *Scope, table_handle tf.Output, filenam return scope.AddOperation(opspec) } +// Real-valued fast Fourier transform. +// +// Computes the 1-dimensional discrete Fourier transform of a real-valued signal +// over the inner-most dimension of `input`. +// +// Since the DFT of a real signal is Hermitian-symmetric, `RFFT` only returns the +// `fft_length / 2 + 1` unique components of the FFT: the zero-frequency term, +// followed by the `fft_length / 2` positive-frequency terms. +// +// Along the axis `RFFT` is computed on, if `fft_length` is smaller than the +// corresponding dimension of `input`, the dimension is cropped. If it is larger, +// the dimension is padded with zeros. +// +// Arguments: +// input: A float32 tensor. +// fft_length: An int32 tensor of shape [1]. The FFT length. +// +// Returns A complex64 tensor of the same rank as `input`. The inner-most +// dimension of `input` is replaced with the `fft_length / 2 + 1` unique +// frequency components of its 1D Fourier transform. +// +// @compatibility(numpy) +// Equivalent to np.fft.rfft +// @end_compatibility +func RFFT(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "RFFT", + Input: []tf.Input{ + input, fft_length, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // QuantizedReluAttr is an optional argument to QuantizedRelu. type QuantizedReluAttr func(optionalAttr) @@ -15398,19 +15873,229 @@ func MaxPoolV2DataFormat(value string) MaxPoolV2Attr { // input tensor. // padding: The type of padding algorithm to use. // -// Returns The max pooled output tensor. -func MaxPoolV2(scope *Scope, input tf.Output, ksize tf.Output, strides tf.Output, padding string, optional ...MaxPoolV2Attr) (output tf.Output) { +// Returns The max pooled output tensor. +func MaxPoolV2(scope *Scope, input tf.Output, ksize tf.Output, strides tf.Output, padding string, optional ...MaxPoolV2Attr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"padding": padding} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "MaxPoolV2", + Input: []tf.Input{ + input, ksize, strides, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// SkipgramAttr is an optional argument to Skipgram. +type SkipgramAttr func(optionalAttr) + +// SkipgramWindowSize sets the optional window_size attribute to value. +// +// value: The number of words to predict to the left and right of the target. +// If not specified, defaults to 5 +func SkipgramWindowSize(value int64) SkipgramAttr { + return func(m optionalAttr) { + m["window_size"] = value + } +} + +// SkipgramMinCount sets the optional min_count attribute to value. +// +// value: The minimum number of word occurrences for it to be included in the +// vocabulary. +// If not specified, defaults to 5 +func SkipgramMinCount(value int64) SkipgramAttr { + return func(m optionalAttr) { + m["min_count"] = value + } +} + +// SkipgramSubsample sets the optional subsample attribute to value. +// +// value: Threshold for word occurrence. Words that appear with higher +// frequency will be randomly down-sampled. Set to 0 to disable. +// If not specified, defaults to 0.001 +func SkipgramSubsample(value float32) SkipgramAttr { + return func(m optionalAttr) { + m["subsample"] = value + } +} + +// Parses a text file and creates a batch of examples. +// +// DEPRECATED at GraphDef version 19: Moving word2vec into tensorflow_models/tutorials and deprecating its ops here as a result +// +// Arguments: +// filename: The corpus's text file name. +// batch_size: The size of produced batch. +// +// Returns A vector of words in the corpus.Frequencies of words. Sorted in the non-ascending order.Number of words per epoch in the data file.The current epoch number.The total number of words processed so far.A vector of word ids.A vector of word ids. +func Skipgram(scope *Scope, filename string, batch_size int64, optional ...SkipgramAttr) (vocab_word tf.Output, vocab_freq tf.Output, words_per_epoch tf.Output, current_epoch tf.Output, total_words_processed tf.Output, examples tf.Output, labels tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"filename": filename, "batch_size": batch_size} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "Skipgram", + + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4), op.Output(5), op.Output(6) +} + +// StringToNumberAttr is an optional argument to StringToNumber. +type StringToNumberAttr func(optionalAttr) + +// StringToNumberOutType sets the optional out_type attribute to value. +// +// value: The numeric type to interpret each string in `string_tensor` as. +// If not specified, defaults to DT_FLOAT +func StringToNumberOutType(value tf.DataType) StringToNumberAttr { + return func(m optionalAttr) { + m["out_type"] = value + } +} + +// Converts each string in the input Tensor to the specified numeric type. +// +// (Note that int32 overflow results in an error while float overflow +// results in a rounded value.) +// +// Returns A Tensor of the same shape as the input `string_tensor`. +func StringToNumber(scope *Scope, string_tensor tf.Output, optional ...StringToNumberAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "StringToNumber", + Input: []tf.Input{ + string_tensor, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// ResourceApplyFtrlV2Attr is an optional argument to ResourceApplyFtrlV2. +type ResourceApplyFtrlV2Attr func(optionalAttr) + +// ResourceApplyFtrlV2UseLocking sets the optional use_locking attribute to value. +// +// value: If `True`, updating of the var and accum tensors will be protected +// by a lock; otherwise the behavior is undefined, but may exhibit less +// contention. +// If not specified, defaults to false +func ResourceApplyFtrlV2UseLocking(value bool) ResourceApplyFtrlV2Attr { + return func(m optionalAttr) { + m["use_locking"] = value + } +} + +// Update '*var' according to the Ftrl-proximal scheme. +// +// grad_with_shrinkage = grad + 2 * l2_shrinkage * var +// accum_new = accum + grad_with_shrinkage * grad_with_shrinkage +// linear += grad_with_shrinkage + +// (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var +// quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2 +// var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0 +// accum = accum_new +// +// Arguments: +// var_: Should be from a Variable(). +// accum: Should be from a Variable(). +// linear: Should be from a Variable(). +// grad: The gradient. +// lr: Scaling factor. Must be a scalar. +// l1: L1 regulariation. Must be a scalar. +// l2: L2 shrinkage regulariation. Must be a scalar. +// +// lr_power: Scaling factor. Must be a scalar. +// +// Returns the created operation. +func ResourceApplyFtrlV2(scope *Scope, var_ tf.Output, accum tf.Output, linear tf.Output, grad tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, l2_shrinkage tf.Output, lr_power tf.Output, optional ...ResourceApplyFtrlV2Attr) (o *tf.Operation) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "ResourceApplyFtrlV2", + Input: []tf.Input{ + var_, accum, linear, grad, lr, l1, l2, l2_shrinkage, lr_power, + }, + Attrs: attrs, + } + return scope.AddOperation(opspec) +} + +// TruncatedNormalAttr is an optional argument to TruncatedNormal. +type TruncatedNormalAttr func(optionalAttr) + +// TruncatedNormalSeed sets the optional seed attribute to value. +// +// value: If either `seed` or `seed2` are set to be non-zero, the random number +// generator is seeded by the given seed. Otherwise, it is seeded by a +// random seed. +// If not specified, defaults to 0 +func TruncatedNormalSeed(value int64) TruncatedNormalAttr { + return func(m optionalAttr) { + m["seed"] = value + } +} + +// TruncatedNormalSeed2 sets the optional seed2 attribute to value. +// +// value: A second seed to avoid seed collision. +// If not specified, defaults to 0 +func TruncatedNormalSeed2(value int64) TruncatedNormalAttr { + return func(m optionalAttr) { + m["seed2"] = value + } +} + +// Outputs random values from a truncated normal distribution. +// +// The generated values follow a normal distribution with mean 0 and standard +// deviation 1, except that values whose magnitude is more than 2 standard +// deviations from the mean are dropped and re-picked. +// +// Arguments: +// shape: The shape of the output tensor. +// dtype: The type of the output. +// +// Returns A tensor of the specified shape filled with random truncated normal +// values. +func TruncatedNormal(scope *Scope, shape tf.Output, dtype tf.DataType, optional ...TruncatedNormalAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"padding": padding} + attrs := map[string]interface{}{"dtype": dtype} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "MaxPoolV2", + Type: "TruncatedNormal", Input: []tf.Input{ - input, ksize, strides, + shape, }, Attrs: attrs, } @@ -15799,739 +16484,392 @@ func QuantizedMaxPool(scope *Scope, input tf.Output, min_input tf.Output, max_in } op := scope.AddOperation(opspec) return op.Output(0), op.Output(1), op.Output(2) -} - -// Computes softplus: `log(exp(features) + 1)`. -func Softplus(scope *Scope, features tf.Output) (activations tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Softplus", - Input: []tf.Input{ - features, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes exponential of x - 1 element-wise. -// -// I.e., \\(y = (\exp x) - 1\\). -func Expm1(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Expm1", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns the number of records this Reader has produced. -// -// This is the same as the number of ReaderRead executions that have -// succeeded. -// -// Arguments: -// reader_handle: Handle to a Reader. -func ReaderNumRecordsProducedV2(scope *Scope, reader_handle tf.Output) (records_produced tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ReaderNumRecordsProducedV2", - Input: []tf.Input{ - reader_handle, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes the sum along segments of a tensor. -// -// Read @{$math_ops#Segmentation$the section on segmentation} for an explanation of -// segments. -// -// Computes a tensor such that -// \\(output_i = \sum_j data_j\\) where sum is over `j` such -// that `segment_ids[j] == i`. -// -// If the sum is empty for a given segment ID `i`, `output[i] = 0`. -// -//
-// -//
-// -// Arguments: -// -// segment_ids: A 1-D tensor whose rank is equal to the rank of `data`'s -// first dimension. Values should be sorted and can be repeated. -// -// Returns Has same shape as data, except for dimension 0 which -// has size `k`, the number of segments. -func SegmentSum(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SegmentSum", - Input: []tf.Input{ - data, segment_ids, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Creates a dataset that emits the lines of one or more text files. -// -// Arguments: -// filenames: A scalar or a vector containing the name(s) of the file(s) to be -// read. -// compression_type: A scalar containing either (i) the empty string (no -// compression), (ii) "ZLIB", or (iii) "GZIP". -// buffer_size: A scalar containing the number of bytes to buffer. -func TextLineDataset(scope *Scope, filenames tf.Output, compression_type tf.Output, buffer_size tf.Output) (handle tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "TextLineDataset", - Input: []tf.Input{ - filenames, compression_type, buffer_size, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// CudnnRNNParamsSizeAttr is an optional argument to CudnnRNNParamsSize. -type CudnnRNNParamsSizeAttr func(optionalAttr) - -// CudnnRNNParamsSizeRnnMode sets the optional rnn_mode attribute to value. -// If not specified, defaults to "lstm" -func CudnnRNNParamsSizeRnnMode(value string) CudnnRNNParamsSizeAttr { - return func(m optionalAttr) { - m["rnn_mode"] = value - } -} - -// CudnnRNNParamsSizeInputMode sets the optional input_mode attribute to value. -// If not specified, defaults to "linear_input" -func CudnnRNNParamsSizeInputMode(value string) CudnnRNNParamsSizeAttr { - return func(m optionalAttr) { - m["input_mode"] = value - } -} - -// CudnnRNNParamsSizeDirection sets the optional direction attribute to value. -// If not specified, defaults to "unidirectional" -func CudnnRNNParamsSizeDirection(value string) CudnnRNNParamsSizeAttr { - return func(m optionalAttr) { - m["direction"] = value - } -} - -// CudnnRNNParamsSizeDropout sets the optional dropout attribute to value. -// If not specified, defaults to 0 -func CudnnRNNParamsSizeDropout(value float32) CudnnRNNParamsSizeAttr { - return func(m optionalAttr) { - m["dropout"] = value - } -} - -// CudnnRNNParamsSizeSeed sets the optional seed attribute to value. -// If not specified, defaults to 0 -func CudnnRNNParamsSizeSeed(value int64) CudnnRNNParamsSizeAttr { - return func(m optionalAttr) { - m["seed"] = value - } -} - -// CudnnRNNParamsSizeSeed2 sets the optional seed2 attribute to value. -// If not specified, defaults to 0 -func CudnnRNNParamsSizeSeed2(value int64) CudnnRNNParamsSizeAttr { - return func(m optionalAttr) { - m["seed2"] = value - } -} - -// Computes size of weights that can be used by a Cudnn RNN model. -// -// Return the params size that can be used by the Cudnn RNN model. Subsequent -// weight allocation and initialization should use this size. -// -// num_layers: Specifies the number of layers in the RNN model. -// num_units: Specifies the size of the hidden state. -// input_size: Specifies the size of the input state. -// rnn_mode: Indicates the type of the RNN model. -// input_mode: Indicate whether there is a linear projection between the input and -// The actual computation before the first layer. 'skip_input' is only allowed -// when input_size == num_units; 'auto_select' implies 'skip_input' when -// input_size == num_units; otherwise, it implies 'linear_input'. -// direction: Indicates whether a bidirectional model will be used. -// dir = (direction == bidirectional) ? 2 : 1 -// dropout: dropout probability. When set to 0., dropout is disabled. -// seed: the 1st part of a seed to initialize dropout. -// seed2: the 2nd part of a seed to initialize dropout. -// params_size: The size of the params buffer that should be allocated and -// initialized for this RNN model. Note that this params buffer may not be -// compatible across GPUs. Please use CudnnRNNParamsWeights and -// CudnnRNNParamsBiases to save and restore them in a way that is compatible -// across different runs. -func CudnnRNNParamsSize(scope *Scope, num_layers tf.Output, num_units tf.Output, input_size tf.Output, T tf.DataType, S tf.DataType, optional ...CudnnRNNParamsSizeAttr) (params_size tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"T": T, "S": S} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "CudnnRNNParamsSize", - Input: []tf.Input{ - num_layers, num_units, input_size, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes gradients for SparseSegmentMean. -// -// Returns tensor "output" with same shape as grad, except for dimension 0 whose -// value is output_dim0. -// -// Arguments: -// grad: gradient propagated to the SparseSegmentMean op. -// indices: indices passed to the corresponding SparseSegmentMean op. -// segment_ids: segment_ids passed to the corresponding SparseSegmentMean op. -// output_dim0: dimension 0 of "data" passed to SparseSegmentMean op. -func SparseSegmentMeanGrad(scope *Scope, grad tf.Output, indices tf.Output, segment_ids tf.Output, output_dim0 tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SparseSegmentMeanGrad", - Input: []tf.Input{ - grad, indices, segment_ids, output_dim0, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns the set of files matching one or more glob patterns. -// -// Note that this routine only supports wildcard characters in the -// basename portion of the pattern, not in the directory portion. -// Note also that the order of filenames returned can be non-deterministic. -// -// Arguments: -// pattern: Shell wildcard pattern(s). Scalar or vector of type string. -// -// Returns A vector of matching filenames. -func MatchingFiles(scope *Scope, pattern tf.Output) (filenames tf.Output) { +} + +// Computes softplus: `log(exp(features) + 1)`. +func Softplus(scope *Scope, features tf.Output) (activations tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "MatchingFiles", + Type: "Softplus", Input: []tf.Input{ - pattern, + features, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Returns the truth value of (x >= y) element-wise. +// Computes exponential of x - 1 element-wise. // -// *NOTE*: `GreaterEqual` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func GreaterEqual(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { +// I.e., \\(y = (\exp x) - 1\\). +func Expm1(scope *Scope, x tf.Output) (y tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "GreaterEqual", + Type: "Expm1", Input: []tf.Input{ - x, y, + x, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Conv3DAttr is an optional argument to Conv3D. -type Conv3DAttr func(optionalAttr) - -// Conv3DDataFormat sets the optional data_format attribute to value. -// -// value: The data format of the input and output data. With the -// default format "NDHWC", the data is stored in the order of: -// [batch, in_depth, in_height, in_width, in_channels]. -// Alternatively, the format could be "NCDHW", the data storage order is: -// [batch, in_channels, in_depth, in_height, in_width]. -// If not specified, defaults to "NDHWC" -func Conv3DDataFormat(value string) Conv3DAttr { - return func(m optionalAttr) { - m["data_format"] = value - } -} - -// Conv3DDilations sets the optional dilations attribute to value. -// -// value: 1-D tensor of length 5. The dilation factor for each dimension of -// `input`. If set to k > 1, there will be k-1 skipped cells between each -// filter element on that dimension. The dimension order is determined by the -// value of `data_format`, see above for details. Dilations in the batch and -// depth dimensions must be 1. -// If not specified, defaults to -func Conv3DDilations(value []int64) Conv3DAttr { - return func(m optionalAttr) { - m["dilations"] = value - } -} - -// Computes a 3-D convolution given 5-D `input` and `filter` tensors. -// -// In signal processing, cross-correlation is a measure of similarity of -// two waveforms as a function of a time-lag applied to one of them. This -// is also known as a sliding dot product or sliding inner-product. +// Returns the number of records this Reader has produced. // -// Our Conv3D implements a form of cross-correlation. +// This is the same as the number of ReaderRead executions that have +// succeeded. // // Arguments: -// input: Shape `[batch, in_depth, in_height, in_width, in_channels]`. -// filter: Shape `[filter_depth, filter_height, filter_width, in_channels, -// out_channels]`. `in_channels` must match between `input` and `filter`. -// strides: 1-D tensor of length 5. The stride of the sliding window for each -// dimension of `input`. Must have `strides[0] = strides[4] = 1`. -// padding: The type of padding algorithm to use. -func Conv3D(scope *Scope, input tf.Output, filter tf.Output, strides []int64, padding string, optional ...Conv3DAttr) (output tf.Output) { +// reader_handle: Handle to a Reader. +func ReaderNumRecordsProducedV2(scope *Scope, reader_handle tf.Output) (records_produced tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"strides": strides, "padding": padding} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "Conv3D", + Type: "ReaderNumRecordsProducedV2", Input: []tf.Input{ - input, filter, + reader_handle, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Adds up a SparseTensor and a dense Tensor, using these special rules: +// Computes the sum along segments of a tensor. // -// (1) Broadcasts the dense side to have the same shape as the sparse side, if -// eligible; -// (2) Then, only the dense values pointed to by the indices of the SparseTensor -// participate in the cwise addition. +// Read @{$math_ops#Segmentation$the section on segmentation} for an explanation of +// segments. // -// By these rules, the result is a logical SparseTensor with exactly the same -// indices and shape, but possibly with different non-zero values. The output of -// this Op is the resultant non-zero values. +// Computes a tensor such that +// \\(output_i = \sum_j data_j\\) where sum is over `j` such +// that `segment_ids[j] == i`. +// +// If the sum is empty for a given segment ID `i`, `output[i] = 0`. +// +//
+// +//
// // Arguments: -// sp_indices: 2-D. `N x R` matrix with the indices of non-empty values in a -// SparseTensor, possibly not in canonical ordering. -// sp_values: 1-D. `N` non-empty values corresponding to `sp_indices`. -// sp_shape: 1-D. Shape of the input SparseTensor. -// dense: `R`-D. The dense Tensor operand. // -// Returns 1-D. The `N` values that are operated on. -func SparseDenseCwiseAdd(scope *Scope, sp_indices tf.Output, sp_values tf.Output, sp_shape tf.Output, dense tf.Output) (output tf.Output) { +// segment_ids: A 1-D tensor whose rank is equal to the rank of `data`'s +// first dimension. Values should be sorted and can be repeated. +// +// Returns Has same shape as data, except for dimension 0 which +// has size `k`, the number of segments. +func SegmentSum(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "SparseDenseCwiseAdd", + Type: "SegmentSum", Input: []tf.Input{ - sp_indices, sp_values, sp_shape, dense, + data, segment_ids, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Read an element from the TensorArray into output `value`. +// Creates a dataset that emits the lines of one or more text files. // // Arguments: -// handle: The handle to a TensorArray. -// -// flow_in: A float scalar that enforces proper chaining of operations. -// dtype: The type of the elem that is returned. -// -// Returns The tensor that is read from the TensorArray. -func TensorArrayReadV3(scope *Scope, handle tf.Output, index tf.Output, flow_in tf.Output, dtype tf.DataType) (value tf.Output) { +// filenames: A scalar or a vector containing the name(s) of the file(s) to be +// read. +// compression_type: A scalar containing either (i) the empty string (no +// compression), (ii) "ZLIB", or (iii) "GZIP". +// buffer_size: A scalar containing the number of bytes to buffer. +func TextLineDataset(scope *Scope, filenames tf.Output, compression_type tf.Output, buffer_size tf.Output) (handle tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"dtype": dtype} opspec := tf.OpSpec{ - Type: "TensorArrayReadV3", + Type: "TextLineDataset", Input: []tf.Input{ - handle, index, flow_in, + filenames, compression_type, buffer_size, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// QuantizeV2Attr is an optional argument to QuantizeV2. -type QuantizeV2Attr func(optionalAttr) - -// QuantizeV2Mode sets the optional mode attribute to value. -// If not specified, defaults to "MIN_COMBINED" -func QuantizeV2Mode(value string) QuantizeV2Attr { - return func(m optionalAttr) { - m["mode"] = value - } -} - -// QuantizeV2RoundMode sets the optional round_mode attribute to value. -// If not specified, defaults to "HALF_AWAY_FROM_ZERO" -func QuantizeV2RoundMode(value string) QuantizeV2Attr { - return func(m optionalAttr) { - m["round_mode"] = value - } -} +// CudnnRNNParamsSizeAttr is an optional argument to CudnnRNNParamsSize. +type CudnnRNNParamsSizeAttr func(optionalAttr) -// Quantize the 'input' tensor of type float to 'output' tensor of type 'T'. -// -// [min_range, max_range] are scalar floats that specify the range for -// the 'input' data. The 'mode' attribute controls exactly which calculations are -// used to convert the float values to their quantized equivalents. The -// 'round_mode' attribute controls which rounding tie-breaking algorithm is used -// when rounding float values to their quantized equivalents. -// -// In 'MIN_COMBINED' mode, each value of the tensor will undergo the following: -// -// ``` -// out[i] = (in[i] - min_range) * range(T) / (max_range - min_range) -// if T == qint8, out[i] -= (range(T) + 1) / 2.0 -// ``` -// here `range(T) = numeric_limits::max() - numeric_limits::min()` -// -// *MIN_COMBINED Mode Example* -// -// Assume the input is type float and has a possible range of [0.0, 6.0] and the -// output type is quint8 ([0, 255]). The min_range and max_range values should be -// specified as 0.0 and 6.0. Quantizing from float to quint8 will multiply each -// value of the input by 255/6 and cast to quint8. -// -// If the output type was qint8 ([-128, 127]), the operation will additionally -// subtract each value by 128 prior to casting, so that the range of values aligns -// with the range of qint8. -// -// If the mode is 'MIN_FIRST', then this approach is used: -// -// ``` -// num_discrete_values = 1 << (# of bits in T) -// range_adjust = num_discrete_values / (num_discrete_values - 1) -// range = (range_max - range_min) * range_adjust -// range_scale = num_discrete_values / range -// quantized = round(input * range_scale) - round(range_min * range_scale) + -// numeric_limits::min() -// quantized = max(quantized, numeric_limits::min()) -// quantized = min(quantized, numeric_limits::max()) -// ``` -// -// The biggest difference between this and MIN_COMBINED is that the minimum range -// is rounded first, before it's subtracted from the rounded value. With -// MIN_COMBINED, a small bias is introduced where repeated iterations of quantizing -// and dequantizing will introduce a larger and larger error. -// -// *SCALED mode Example* -// -// `SCALED` mode matches the quantization approach used in -// `QuantizeAndDequantize{V2|V3}`. -// -// If the mode is `SCALED`, we do not use the full range of the output type, -// choosing to elide the lowest possible value for symmetry (e.g., output range is -// -127 to 127, not -128 to 127 for signed 8 bit quantization), so that 0.0 maps to -// 0. -// -// We first find the range of values in our tensor. The -// range we use is always centered on 0, so we find m such that -// ```c++ -// m = max(abs(input_min), abs(input_max)) -// ``` -// -// Our input tensor range is then `[-m, m]`. -// -// Next, we choose our fixed-point quantization buckets, `[min_fixed, max_fixed]`. -// If T is signed, this is -// ``` -// num_bits = sizeof(T) * 8 -// [min_fixed, max_fixed] = -// [-(1 << (num_bits - 1) - 1), (1 << (num_bits - 1)) - 1] -// ``` -// -// Otherwise, if T is unsigned, the fixed-point range is -// ``` -// [min_fixed, max_fixed] = [0, (1 << num_bits) - 1] -// ``` -// -// From this we compute our scaling factor, s: -// ```c++ -// s = (max_fixed - min_fixed) / (2 * m) -// ``` -// -// Now we can quantize the elements of our tensor: -// ```c++ -// result = round(input * s) -// ``` -// -// One thing to watch out for is that the operator may choose to adjust the -// requested minimum and maximum values slightly during the quantization process, -// so you should always use the output ports as the range for further calculations. -// For example, if the requested minimum and maximum values are close to equal, -// they will be separated by a small epsilon value to prevent ill-formed quantized -// buffers from being created. Otherwise, you can end up with buffers where all the -// quantized values map to the same float value, which causes problems for -// operations that have to perform further calculations on them. -// -// Arguments: -// -// min_range: The minimum scalar value possibly produced for the input. -// max_range: The maximum scalar value possibly produced for the input. +// CudnnRNNParamsSizeRnnMode sets the optional rnn_mode attribute to value. +// If not specified, defaults to "lstm" +func CudnnRNNParamsSizeRnnMode(value string) CudnnRNNParamsSizeAttr { + return func(m optionalAttr) { + m["rnn_mode"] = value + } +} + +// CudnnRNNParamsSizeInputMode sets the optional input_mode attribute to value. +// If not specified, defaults to "linear_input" +func CudnnRNNParamsSizeInputMode(value string) CudnnRNNParamsSizeAttr { + return func(m optionalAttr) { + m["input_mode"] = value + } +} + +// CudnnRNNParamsSizeDirection sets the optional direction attribute to value. +// If not specified, defaults to "unidirectional" +func CudnnRNNParamsSizeDirection(value string) CudnnRNNParamsSizeAttr { + return func(m optionalAttr) { + m["direction"] = value + } +} + +// CudnnRNNParamsSizeDropout sets the optional dropout attribute to value. +// If not specified, defaults to 0 +func CudnnRNNParamsSizeDropout(value float32) CudnnRNNParamsSizeAttr { + return func(m optionalAttr) { + m["dropout"] = value + } +} + +// CudnnRNNParamsSizeSeed sets the optional seed attribute to value. +// If not specified, defaults to 0 +func CudnnRNNParamsSizeSeed(value int64) CudnnRNNParamsSizeAttr { + return func(m optionalAttr) { + m["seed"] = value + } +} + +// CudnnRNNParamsSizeSeed2 sets the optional seed2 attribute to value. +// If not specified, defaults to 0 +func CudnnRNNParamsSizeSeed2(value int64) CudnnRNNParamsSizeAttr { + return func(m optionalAttr) { + m["seed2"] = value + } +} + +// Computes size of weights that can be used by a Cudnn RNN model. // +// Return the params size that can be used by the Cudnn RNN model. Subsequent +// weight allocation and initialization should use this size. // -// Returns The quantized data produced from the float input.The actual minimum scalar value used for the output.The actual maximum scalar value used for the output. -func QuantizeV2(scope *Scope, input tf.Output, min_range tf.Output, max_range tf.Output, T tf.DataType, optional ...QuantizeV2Attr) (output tf.Output, output_min tf.Output, output_max tf.Output) { +// num_layers: Specifies the number of layers in the RNN model. +// num_units: Specifies the size of the hidden state. +// input_size: Specifies the size of the input state. +// rnn_mode: Indicates the type of the RNN model. +// input_mode: Indicate whether there is a linear projection between the input and +// The actual computation before the first layer. 'skip_input' is only allowed +// when input_size == num_units; 'auto_select' implies 'skip_input' when +// input_size == num_units; otherwise, it implies 'linear_input'. +// direction: Indicates whether a bidirectional model will be used. +// dir = (direction == bidirectional) ? 2 : 1 +// dropout: dropout probability. When set to 0., dropout is disabled. +// seed: the 1st part of a seed to initialize dropout. +// seed2: the 2nd part of a seed to initialize dropout. +// params_size: The size of the params buffer that should be allocated and +// initialized for this RNN model. Note that this params buffer may not be +// compatible across GPUs. Please use CudnnRNNParamsWeights and +// CudnnRNNParamsBiases to save and restore them in a way that is compatible +// across different runs. +func CudnnRNNParamsSize(scope *Scope, num_layers tf.Output, num_units tf.Output, input_size tf.Output, T tf.DataType, S tf.DataType, optional ...CudnnRNNParamsSizeAttr) (params_size tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"T": T} + attrs := map[string]interface{}{"T": T, "S": S} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "QuantizeV2", + Type: "CudnnRNNParamsSize", Input: []tf.Input{ - input, min_range, max_range, + num_layers, num_units, input_size, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return op.Output(0) } -// Returns the truth value of (x < y) element-wise. +// Computes gradients for SparseSegmentMean. // -// *NOTE*: `Less` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func Less(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { +// Returns tensor "output" with same shape as grad, except for dimension 0 whose +// value is output_dim0. +// +// Arguments: +// grad: gradient propagated to the SparseSegmentMean op. +// indices: indices passed to the corresponding SparseSegmentMean op. +// segment_ids: segment_ids passed to the corresponding SparseSegmentMean op. +// output_dim0: dimension 0 of "data" passed to SparseSegmentMean op. +func SparseSegmentMeanGrad(scope *Scope, grad tf.Output, indices tf.Output, segment_ids tf.Output, output_dim0 tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Less", + Type: "SparseSegmentMeanGrad", Input: []tf.Input{ - x, y, + grad, indices, segment_ids, output_dim0, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// QuantizedReluXAttr is an optional argument to QuantizedReluX. -type QuantizedReluXAttr func(optionalAttr) - -// QuantizedReluXOutType sets the optional out_type attribute to value. -// If not specified, defaults to DT_QUINT8 -func QuantizedReluXOutType(value tf.DataType) QuantizedReluXAttr { - return func(m optionalAttr) { - m["out_type"] = value - } -} - -// Computes Quantized Rectified Linear X: `min(max(features, 0), max_value)` -// -// Arguments: +// Returns the set of files matching one or more glob patterns. // +// Note that this routine only supports wildcard characters in the +// basename portion of the pattern, not in the directory portion. +// Note also that the order of filenames returned can be non-deterministic. // -// min_features: The float value that the lowest quantized value represents. -// max_features: The float value that the highest quantized value represents. +// Arguments: +// pattern: Shell wildcard pattern(s). Scalar or vector of type string. // -// Returns Has the same output shape as "features".The float value that the lowest quantized value represents.The float value that the highest quantized value represents. -func QuantizedReluX(scope *Scope, features tf.Output, max_value tf.Output, min_features tf.Output, max_features tf.Output, optional ...QuantizedReluXAttr) (activations tf.Output, min_activations tf.Output, max_activations tf.Output) { +// Returns A vector of matching filenames. +func MatchingFiles(scope *Scope, pattern tf.Output) (filenames tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "QuantizedReluX", + Type: "MatchingFiles", Input: []tf.Input{ - features, max_value, min_features, max_features, + pattern, }, - Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return op.Output(0) } -// QuantizedConv2DAttr is an optional argument to QuantizedConv2D. -type QuantizedConv2DAttr func(optionalAttr) +// HistogramFixedWidthAttr is an optional argument to HistogramFixedWidth. +type HistogramFixedWidthAttr func(optionalAttr) -// QuantizedConv2DOutType sets the optional out_type attribute to value. -// If not specified, defaults to DT_QINT32 -func QuantizedConv2DOutType(value tf.DataType) QuantizedConv2DAttr { +// HistogramFixedWidthDtype sets the optional dtype attribute to value. +// If not specified, defaults to DT_INT32 +func HistogramFixedWidthDtype(value tf.DataType) HistogramFixedWidthAttr { return func(m optionalAttr) { - m["out_type"] = value + m["dtype"] = value } } -// QuantizedConv2DDilations sets the optional dilations attribute to value. +// Return histogram of values. // -// value: 1-D tensor of length 4. The dilation factor for each dimension of -// `input`. If set to k > 1, there will be k-1 skipped cells between each -// filter element on that dimension. The dimension order is determined by the -// value of `data_format`, see above for details. Dilations in the batch and -// depth dimensions must be 1. -// If not specified, defaults to -func QuantizedConv2DDilations(value []int64) QuantizedConv2DAttr { - return func(m optionalAttr) { - m["dilations"] = value - } -} - -// Computes a 2D convolution given quantized 4D input and filter tensors. +// Given the tensor `values`, this operation returns a rank 1 histogram counting +// the number of entries in `values` that fall into every bin. The bins are +// equal width and determined by the arguments `value_range` and `nbins`. // -// The inputs are quantized tensors where the lowest value represents the real -// number of the associated minimum, and the highest represents the maximum. -// This means that you can only interpret the quantized output in the same way, by -// taking the returned minimum and maximum values into account. +// ```python +// # Bins will be: (-inf, 1), [1, 2), [2, 3), [3, 4), [4, inf) +// nbins = 5 +// value_range = [0.0, 5.0] +// new_values = [-1.0, 0.0, 1.5, 2.0, 5.0, 15] // -// Arguments: +// with tf.get_default_session() as sess: +// hist = tf.histogram_fixed_width(new_values, value_range, nbins=5) +// variables.global_variables_initializer().run() +// sess.run(hist) => [2, 1, 1, 0, 2] +// ``` // -// filter: filter's input_depth dimension must match input's depth dimensions. -// min_input: The float value that the lowest quantized input value represents. -// max_input: The float value that the highest quantized input value represents. -// min_filter: The float value that the lowest quantized filter value represents. -// max_filter: The float value that the highest quantized filter value represents. -// strides: The stride of the sliding window for each dimension of the input -// tensor. -// padding: The type of padding algorithm to use. +// Arguments: +// values: Numeric `Tensor`. +// value_range: Shape [2] `Tensor` of same `dtype` as `values`. +// values <= value_range[0] will be mapped to hist[0], +// values >= value_range[1] will be mapped to hist[-1]. +// nbins: Scalar `int32 Tensor`. Number of histogram bins. // -// Returns The float value that the lowest quantized output value represents.The float value that the highest quantized output value represents. -func QuantizedConv2D(scope *Scope, input tf.Output, filter tf.Output, min_input tf.Output, max_input tf.Output, min_filter tf.Output, max_filter tf.Output, strides []int64, padding string, optional ...QuantizedConv2DAttr) (output tf.Output, min_output tf.Output, max_output tf.Output) { +// Returns A 1-D `Tensor` holding histogram of values. +func HistogramFixedWidth(scope *Scope, values tf.Output, value_range tf.Output, nbins tf.Output, optional ...HistogramFixedWidthAttr) (out tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"strides": strides, "padding": padding} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "QuantizedConv2D", + Type: "HistogramFixedWidth", Input: []tf.Input{ - input, filter, min_input, max_input, min_filter, max_filter, + values, value_range, nbins, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// StatelessMultinomialAttr is an optional argument to StatelessMultinomial. -type StatelessMultinomialAttr func(optionalAttr) - -// StatelessMultinomialOutputDtype sets the optional output_dtype attribute to value. -// If not specified, defaults to DT_INT64 -func StatelessMultinomialOutputDtype(value tf.DataType) StatelessMultinomialAttr { - return func(m optionalAttr) { - m["output_dtype"] = value - } + return op.Output(0) } -// Draws samples from a multinomial distribution. -// -// Arguments: -// logits: 2-D Tensor with shape `[batch_size, num_classes]`. Each slice `[i, :]` -// represents the unnormalized log probabilities for all classes. -// num_samples: 0-D. Number of independent samples to draw for each row slice. -// seed: 2 seeds (shape [2]). +// Returns the truth value of (x >= y) element-wise. // -// Returns 2-D Tensor with shape `[batch_size, num_samples]`. Each slice `[i, :]` -// contains the drawn class labels with range `[0, num_classes)`. -func StatelessMultinomial(scope *Scope, logits tf.Output, num_samples tf.Output, seed tf.Output, optional ...StatelessMultinomialAttr) (output tf.Output) { +// *NOTE*: `GreaterEqual` supports broadcasting. More about broadcasting +// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) +func GreaterEqual(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "StatelessMultinomial", + Type: "GreaterEqual", Input: []tf.Input{ - logits, num_samples, seed, + x, y, }, - Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0) + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Conv3DAttr is an optional argument to Conv3D. +type Conv3DAttr func(optionalAttr) + +// Conv3DDataFormat sets the optional data_format attribute to value. +// +// value: The data format of the input and output data. With the +// default format "NDHWC", the data is stored in the order of: +// [batch, in_depth, in_height, in_width, in_channels]. +// Alternatively, the format could be "NCDHW", the data storage order is: +// [batch, in_channels, in_depth, in_height, in_width]. +// If not specified, defaults to "NDHWC" +func Conv3DDataFormat(value string) Conv3DAttr { + return func(m optionalAttr) { + m["data_format"] = value + } } -// ResourceGatherAttr is an optional argument to ResourceGather. -type ResourceGatherAttr func(optionalAttr) - -// ResourceGatherValidateIndices sets the optional validate_indices attribute to value. -// If not specified, defaults to true -func ResourceGatherValidateIndices(value bool) ResourceGatherAttr { +// Conv3DDilations sets the optional dilations attribute to value. +// +// value: 1-D tensor of length 5. The dilation factor for each dimension of +// `input`. If set to k > 1, there will be k-1 skipped cells between each +// filter element on that dimension. The dimension order is determined by the +// value of `data_format`, see above for details. Dilations in the batch and +// depth dimensions must be 1. +// If not specified, defaults to +func Conv3DDilations(value []int64) Conv3DAttr { return func(m optionalAttr) { - m["validate_indices"] = value + m["dilations"] = value } } -// Gather slices from the variable pointed to by `resource` according to `indices`. -// -// `indices` must be an integer tensor of any dimension (usually 0-D or 1-D). -// Produces an output tensor with shape `indices.shape + params.shape[1:]` where: +// Computes a 3-D convolution given 5-D `input` and `filter` tensors. // -// ```python -// # Scalar indices -// output[:, ..., :] = params[indices, :, ... :] +// In signal processing, cross-correlation is a measure of similarity of +// two waveforms as a function of a time-lag applied to one of them. This +// is also known as a sliding dot product or sliding inner-product. // -// # Vector indices -// output[i, :, ..., :] = params[indices[i], :, ... :] +// Our Conv3D implements a form of cross-correlation. // -// # Higher rank indices -// output[i, ..., j, :, ... :] = params[indices[i, ..., j], :, ..., :] -// ``` -func ResourceGather(scope *Scope, resource tf.Output, indices tf.Output, dtype tf.DataType, optional ...ResourceGatherAttr) (output tf.Output) { +// Arguments: +// input: Shape `[batch, in_depth, in_height, in_width, in_channels]`. +// filter: Shape `[filter_depth, filter_height, filter_width, in_channels, +// out_channels]`. `in_channels` must match between `input` and `filter`. +// strides: 1-D tensor of length 5. The stride of the sliding window for each +// dimension of `input`. Must have `strides[0] = strides[4] = 1`. +// padding: The type of padding algorithm to use. +func Conv3D(scope *Scope, input tf.Output, filter tf.Output, strides []int64, padding string, optional ...Conv3DAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"dtype": dtype} + attrs := map[string]interface{}{"strides": strides, "padding": padding} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "ResourceGather", + Type: "Conv3D", Input: []tf.Input{ - resource, indices, + input, filter, }, Attrs: attrs, } @@ -16539,237 +16877,346 @@ func ResourceGather(scope *Scope, resource tf.Output, indices tf.Output, dtype t return op.Output(0) } -// Delete the TensorArray from its resource container. +// Adds up a SparseTensor and a dense Tensor, using these special rules: // -// This enables the user to close and release the resource in the middle -// of a step/run. +// (1) Broadcasts the dense side to have the same shape as the sparse side, if +// eligible; +// (2) Then, only the dense values pointed to by the indices of the SparseTensor +// participate in the cwise addition. +// +// By these rules, the result is a logical SparseTensor with exactly the same +// indices and shape, but possibly with different non-zero values. The output of +// this Op is the resultant non-zero values. // // Arguments: -// handle: The handle to a TensorArray (output of TensorArray or TensorArrayGrad). +// sp_indices: 2-D. `N x R` matrix with the indices of non-empty values in a +// SparseTensor, possibly not in canonical ordering. +// sp_values: 1-D. `N` non-empty values corresponding to `sp_indices`. +// sp_shape: 1-D. Shape of the input SparseTensor. +// dense: `R`-D. The dense Tensor operand. // -// Returns the created operation. -func TensorArrayCloseV3(scope *Scope, handle tf.Output) (o *tf.Operation) { +// Returns 1-D. The `N` values that are operated on. +func SparseDenseCwiseAdd(scope *Scope, sp_indices tf.Output, sp_values tf.Output, sp_shape tf.Output, dense tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "TensorArrayCloseV3", + Type: "SparseDenseCwiseAdd", Input: []tf.Input{ - handle, + sp_indices, sp_values, sp_shape, dense, }, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0) } -// Adds two `SparseTensor` objects to produce another `SparseTensor`. -// -// The input `SparseTensor` objects' indices are assumed ordered in standard -// lexicographic order. If this is not the case, before this step run -// `SparseReorder` to restore index ordering. +// Read an element from the TensorArray into output `value`. // -// By default, if two values sum to zero at some index, the output `SparseTensor` -// would still include that particular location in its index, storing a zero in the -// corresponding value slot. To override this, callers can specify `thresh`, -// indicating that if the sum has a magnitude strictly smaller than `thresh`, its -// corresponding value and index would then not be included. In particular, -// `thresh == 0` (default) means everything is kept and actual thresholding happens -// only for a positive value. +// Arguments: +// handle: The handle to a TensorArray. // -// In the following shapes, `nnz` is the count after taking `thresh` into account. +// flow_in: A float scalar that enforces proper chaining of operations. +// dtype: The type of the elem that is returned. // -// Arguments: -// a_indices: 2-D. The `indices` of the first `SparseTensor`, size `[nnz, ndims]` Matrix. -// a_values: 1-D. The `values` of the first `SparseTensor`, size `[nnz]` Vector. -// a_shape: 1-D. The `shape` of the first `SparseTensor`, size `[ndims]` Vector. -// b_indices: 2-D. The `indices` of the second `SparseTensor`, size `[nnz, ndims]` Matrix. -// b_values: 1-D. The `values` of the second `SparseTensor`, size `[nnz]` Vector. -// b_shape: 1-D. The `shape` of the second `SparseTensor`, size `[ndims]` Vector. -// thresh: 0-D. The magnitude threshold that determines if an output value/index -// pair takes space. -func SparseAdd(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf.Output, b_indices tf.Output, b_values tf.Output, b_shape tf.Output, thresh tf.Output) (sum_indices tf.Output, sum_values tf.Output, sum_shape tf.Output) { +// Returns The tensor that is read from the TensorArray. +func TensorArrayReadV3(scope *Scope, handle tf.Output, index tf.Output, flow_in tf.Output, dtype tf.DataType) (value tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"dtype": dtype} opspec := tf.OpSpec{ - Type: "SparseAdd", + Type: "TensorArrayReadV3", Input: []tf.Input{ - a_indices, a_values, a_shape, b_indices, b_values, b_shape, thresh, + handle, index, flow_in, }, + Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// OrderedMapPeekAttr is an optional argument to OrderedMapPeek. -type OrderedMapPeekAttr func(optionalAttr) - -// OrderedMapPeekCapacity sets the optional capacity attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func OrderedMapPeekCapacity(value int64) OrderedMapPeekAttr { - return func(m optionalAttr) { - m["capacity"] = value - } + return op.Output(0) } -// OrderedMapPeekMemoryLimit sets the optional memory_limit attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func OrderedMapPeekMemoryLimit(value int64) OrderedMapPeekAttr { - return func(m optionalAttr) { - m["memory_limit"] = value - } -} +// QuantizeV2Attr is an optional argument to QuantizeV2. +type QuantizeV2Attr func(optionalAttr) -// OrderedMapPeekContainer sets the optional container attribute to value. -// If not specified, defaults to "" -func OrderedMapPeekContainer(value string) OrderedMapPeekAttr { +// QuantizeV2Mode sets the optional mode attribute to value. +// If not specified, defaults to "MIN_COMBINED" +func QuantizeV2Mode(value string) QuantizeV2Attr { return func(m optionalAttr) { - m["container"] = value + m["mode"] = value } } -// OrderedMapPeekSharedName sets the optional shared_name attribute to value. -// If not specified, defaults to "" -func OrderedMapPeekSharedName(value string) OrderedMapPeekAttr { +// QuantizeV2RoundMode sets the optional round_mode attribute to value. +// If not specified, defaults to "HALF_AWAY_FROM_ZERO" +func QuantizeV2RoundMode(value string) QuantizeV2Attr { return func(m optionalAttr) { - m["shared_name"] = value + m["round_mode"] = value } } -// Op peeks at the values at the specified key. If the +// Quantize the 'input' tensor of type float to 'output' tensor of type 'T'. +// +// [min_range, max_range] are scalar floats that specify the range for +// the 'input' data. The 'mode' attribute controls exactly which calculations are +// used to convert the float values to their quantized equivalents. The +// 'round_mode' attribute controls which rounding tie-breaking algorithm is used +// when rounding float values to their quantized equivalents. +// +// In 'MIN_COMBINED' mode, each value of the tensor will undergo the following: +// +// ``` +// out[i] = (in[i] - min_range) * range(T) / (max_range - min_range) +// if T == qint8, out[i] -= (range(T) + 1) / 2.0 +// ``` +// here `range(T) = numeric_limits::max() - numeric_limits::min()` +// +// *MIN_COMBINED Mode Example* +// +// Assume the input is type float and has a possible range of [0.0, 6.0] and the +// output type is quint8 ([0, 255]). The min_range and max_range values should be +// specified as 0.0 and 6.0. Quantizing from float to quint8 will multiply each +// value of the input by 255/6 and cast to quint8. +// +// If the output type was qint8 ([-128, 127]), the operation will additionally +// subtract each value by 128 prior to casting, so that the range of values aligns +// with the range of qint8. +// +// If the mode is 'MIN_FIRST', then this approach is used: +// +// ``` +// num_discrete_values = 1 << (# of bits in T) +// range_adjust = num_discrete_values / (num_discrete_values - 1) +// range = (range_max - range_min) * range_adjust +// range_scale = num_discrete_values / range +// quantized = round(input * range_scale) - round(range_min * range_scale) + +// numeric_limits::min() +// quantized = max(quantized, numeric_limits::min()) +// quantized = min(quantized, numeric_limits::max()) +// ``` +// +// The biggest difference between this and MIN_COMBINED is that the minimum range +// is rounded first, before it's subtracted from the rounded value. With +// MIN_COMBINED, a small bias is introduced where repeated iterations of quantizing +// and dequantizing will introduce a larger and larger error. +// +// *SCALED mode Example* +// +// `SCALED` mode matches the quantization approach used in +// `QuantizeAndDequantize{V2|V3}`. +// +// If the mode is `SCALED`, we do not use the full range of the output type, +// choosing to elide the lowest possible value for symmetry (e.g., output range is +// -127 to 127, not -128 to 127 for signed 8 bit quantization), so that 0.0 maps to +// 0. +// +// We first find the range of values in our tensor. The +// range we use is always centered on 0, so we find m such that +// ```c++ +// m = max(abs(input_min), abs(input_max)) +// ``` +// +// Our input tensor range is then `[-m, m]`. +// +// Next, we choose our fixed-point quantization buckets, `[min_fixed, max_fixed]`. +// If T is signed, this is +// ``` +// num_bits = sizeof(T) * 8 +// [min_fixed, max_fixed] = +// [-(1 << (num_bits - 1) - 1), (1 << (num_bits - 1)) - 1] +// ``` +// +// Otherwise, if T is unsigned, the fixed-point range is +// ``` +// [min_fixed, max_fixed] = [0, (1 << num_bits) - 1] +// ``` +// +// From this we compute our scaling factor, s: +// ```c++ +// s = (max_fixed - min_fixed) / (2 * m) +// ``` +// +// Now we can quantize the elements of our tensor: +// ```c++ +// result = round(input * s) +// ``` +// +// One thing to watch out for is that the operator may choose to adjust the +// requested minimum and maximum values slightly during the quantization process, +// so you should always use the output ports as the range for further calculations. +// For example, if the requested minimum and maximum values are close to equal, +// they will be separated by a small epsilon value to prevent ill-formed quantized +// buffers from being created. Otherwise, you can end up with buffers where all the +// quantized values map to the same float value, which causes problems for +// operations that have to perform further calculations on them. +// +// Arguments: // -// underlying container does not contain this key -// this op will block until it does. This Op is optimized for -// performance. -func OrderedMapPeek(scope *Scope, key tf.Output, indices tf.Output, dtypes []tf.DataType, optional ...OrderedMapPeekAttr) (values []tf.Output) { +// min_range: The minimum scalar value possibly produced for the input. +// max_range: The maximum scalar value possibly produced for the input. +// +// +// Returns The quantized data produced from the float input.The actual minimum scalar value used for the output.The actual maximum scalar value used for the output. +func QuantizeV2(scope *Scope, input tf.Output, min_range tf.Output, max_range tf.Output, T tf.DataType, optional ...QuantizeV2Attr) (output tf.Output, output_min tf.Output, output_max tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"dtypes": dtypes} + attrs := map[string]interface{}{"T": T} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "OrderedMapPeek", + Type: "QuantizeV2", Input: []tf.Input{ - key, indices, + input, min_range, max_range, }, Attrs: attrs, } op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2) +} + +// Returns the truth value of (x < y) element-wise. +// +// *NOTE*: `Less` supports broadcasting. More about broadcasting +// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) +func Less(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { if scope.Err() != nil { return } - var idx int - var err error - if values, idx, err = makeOutputList(op, idx, "values"); err != nil { - scope.UpdateErr("OrderedMapPeek", err) - return + opspec := tf.OpSpec{ + Type: "Less", + Input: []tf.Input{ + x, y, + }, } - return values + op := scope.AddOperation(opspec) + return op.Output(0) } -// DecodeAndCropJpegAttr is an optional argument to DecodeAndCropJpeg. -type DecodeAndCropJpegAttr func(optionalAttr) +// QuantizedReluXAttr is an optional argument to QuantizedReluX. +type QuantizedReluXAttr func(optionalAttr) -// DecodeAndCropJpegChannels sets the optional channels attribute to value. -// -// value: Number of color channels for the decoded image. -// If not specified, defaults to 0 -func DecodeAndCropJpegChannels(value int64) DecodeAndCropJpegAttr { +// QuantizedReluXOutType sets the optional out_type attribute to value. +// If not specified, defaults to DT_QUINT8 +func QuantizedReluXOutType(value tf.DataType) QuantizedReluXAttr { return func(m optionalAttr) { - m["channels"] = value + m["out_type"] = value } } -// DecodeAndCropJpegRatio sets the optional ratio attribute to value. +// Computes Quantized Rectified Linear X: `min(max(features, 0), max_value)` // -// value: Downscaling ratio. -// If not specified, defaults to 1 -func DecodeAndCropJpegRatio(value int64) DecodeAndCropJpegAttr { - return func(m optionalAttr) { - m["ratio"] = value +// Arguments: +// +// +// min_features: The float value that the lowest quantized value represents. +// max_features: The float value that the highest quantized value represents. +// +// Returns Has the same output shape as "features".The float value that the lowest quantized value represents.The float value that the highest quantized value represents. +func QuantizedReluX(scope *Scope, features tf.Output, max_value tf.Output, min_features tf.Output, max_features tf.Output, optional ...QuantizedReluXAttr) (activations tf.Output, min_activations tf.Output, max_activations tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) } + opspec := tf.OpSpec{ + Type: "QuantizedReluX", + Input: []tf.Input{ + features, max_value, min_features, max_features, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2) } -// DecodeAndCropJpegFancyUpscaling sets the optional fancy_upscaling attribute to value. -// -// value: If true use a slower but nicer upscaling of the -// chroma planes (yuv420/422 only). -// If not specified, defaults to true -func DecodeAndCropJpegFancyUpscaling(value bool) DecodeAndCropJpegAttr { +// QuantizedConv2DAttr is an optional argument to QuantizedConv2D. +type QuantizedConv2DAttr func(optionalAttr) + +// QuantizedConv2DOutType sets the optional out_type attribute to value. +// If not specified, defaults to DT_QINT32 +func QuantizedConv2DOutType(value tf.DataType) QuantizedConv2DAttr { return func(m optionalAttr) { - m["fancy_upscaling"] = value + m["out_type"] = value } } -// DecodeAndCropJpegTryRecoverTruncated sets the optional try_recover_truncated attribute to value. +// QuantizedConv2DDilations sets the optional dilations attribute to value. // -// value: If true try to recover an image from truncated input. -// If not specified, defaults to false -func DecodeAndCropJpegTryRecoverTruncated(value bool) DecodeAndCropJpegAttr { +// value: 1-D tensor of length 4. The dilation factor for each dimension of +// `input`. If set to k > 1, there will be k-1 skipped cells between each +// filter element on that dimension. The dimension order is determined by the +// value of `data_format`, see above for details. Dilations in the batch and +// depth dimensions must be 1. +// If not specified, defaults to +func QuantizedConv2DDilations(value []int64) QuantizedConv2DAttr { return func(m optionalAttr) { - m["try_recover_truncated"] = value + m["dilations"] = value } } -// DecodeAndCropJpegAcceptableFraction sets the optional acceptable_fraction attribute to value. +// Computes a 2D convolution given quantized 4D input and filter tensors. // -// value: The minimum required fraction of lines before a truncated -// input is accepted. -// If not specified, defaults to 1 -func DecodeAndCropJpegAcceptableFraction(value float32) DecodeAndCropJpegAttr { - return func(m optionalAttr) { - m["acceptable_fraction"] = value +// The inputs are quantized tensors where the lowest value represents the real +// number of the associated minimum, and the highest represents the maximum. +// This means that you can only interpret the quantized output in the same way, by +// taking the returned minimum and maximum values into account. +// +// Arguments: +// +// filter: filter's input_depth dimension must match input's depth dimensions. +// min_input: The float value that the lowest quantized input value represents. +// max_input: The float value that the highest quantized input value represents. +// min_filter: The float value that the lowest quantized filter value represents. +// max_filter: The float value that the highest quantized filter value represents. +// strides: The stride of the sliding window for each dimension of the input +// tensor. +// padding: The type of padding algorithm to use. +// +// Returns The float value that the lowest quantized output value represents.The float value that the highest quantized output value represents. +func QuantizedConv2D(scope *Scope, input tf.Output, filter tf.Output, min_input tf.Output, max_input tf.Output, min_filter tf.Output, max_filter tf.Output, strides []int64, padding string, optional ...QuantizedConv2DAttr) (output tf.Output, min_output tf.Output, max_output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"strides": strides, "padding": padding} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "QuantizedConv2D", + Input: []tf.Input{ + input, filter, min_input, max_input, min_filter, max_filter, + }, + Attrs: attrs, } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2) } -// DecodeAndCropJpegDctMethod sets the optional dct_method attribute to value. -// -// value: string specifying a hint about the algorithm used for -// decompression. Defaults to "" which maps to a system-specific -// default. Currently valid values are ["INTEGER_FAST", -// "INTEGER_ACCURATE"]. The hint may be ignored (e.g., the internal -// jpeg library changes to a version that does not have that specific -// option.) -// If not specified, defaults to "" -func DecodeAndCropJpegDctMethod(value string) DecodeAndCropJpegAttr { +// StatelessMultinomialAttr is an optional argument to StatelessMultinomial. +type StatelessMultinomialAttr func(optionalAttr) + +// StatelessMultinomialOutputDtype sets the optional output_dtype attribute to value. +// If not specified, defaults to DT_INT64 +func StatelessMultinomialOutputDtype(value tf.DataType) StatelessMultinomialAttr { return func(m optionalAttr) { - m["dct_method"] = value + m["output_dtype"] = value } } -// Decode and Crop a JPEG-encoded image to a uint8 tensor. -// -// The attr `channels` indicates the desired number of color channels for the -// decoded image. -// -// Accepted values are: -// -// * 0: Use the number of channels in the JPEG-encoded image. -// * 1: output a grayscale image. -// * 3: output an RGB image. -// -// If needed, the JPEG-encoded image is transformed to match the requested number -// of color channels. -// -// The attr `ratio` allows downscaling the image by an integer factor during -// decoding. Allowed values are: 1, 2, 4, and 8. This is much faster than -// downscaling the image later. -// -// -// It is equivalent to a combination of decode and crop, but much faster by only -// decoding partial jpeg image. +// Draws samples from a multinomial distribution. // // Arguments: -// contents: 0-D. The JPEG-encoded image. -// crop_window: 1-D. The crop window: [crop_y, crop_x, crop_height, crop_width]. +// logits: 2-D Tensor with shape `[batch_size, num_classes]`. Each slice `[i, :]` +// represents the unnormalized log probabilities for all classes. +// num_samples: 0-D. Number of independent samples to draw for each row slice. +// seed: 2 seeds (shape [2]). // -// Returns 3-D with shape `[height, width, channels]`.. -func DecodeAndCropJpeg(scope *Scope, contents tf.Output, crop_window tf.Output, optional ...DecodeAndCropJpegAttr) (image tf.Output) { +// Returns 2-D Tensor with shape `[batch_size, num_samples]`. Each slice `[i, :]` +// contains the drawn class labels with range `[0, num_classes)`. +func StatelessMultinomial(scope *Scope, logits tf.Output, num_samples tf.Output, seed tf.Output, optional ...StatelessMultinomialAttr) (output tf.Output) { if scope.Err() != nil { return } @@ -16778,9 +17225,9 @@ func DecodeAndCropJpeg(scope *Scope, contents tf.Output, crop_window tf.Output, a(attrs) } opspec := tf.OpSpec{ - Type: "DecodeAndCropJpeg", + Type: "StatelessMultinomial", Input: []tf.Input{ - contents, crop_window, + logits, num_samples, seed, }, Attrs: attrs, } @@ -16788,76 +17235,71 @@ func DecodeAndCropJpeg(scope *Scope, contents tf.Output, crop_window tf.Output, return op.Output(0) } -// AllCandidateSamplerAttr is an optional argument to AllCandidateSampler. -type AllCandidateSamplerAttr func(optionalAttr) - -// AllCandidateSamplerSeed sets the optional seed attribute to value. -// -// value: If either seed or seed2 are set to be non-zero, the random number -// generator is seeded by the given seed. Otherwise, it is seeded by a -// random seed. -// If not specified, defaults to 0 -func AllCandidateSamplerSeed(value int64) AllCandidateSamplerAttr { - return func(m optionalAttr) { - m["seed"] = value - } -} +// ResourceGatherAttr is an optional argument to ResourceGather. +type ResourceGatherAttr func(optionalAttr) -// AllCandidateSamplerSeed2 sets the optional seed2 attribute to value. -// -// value: An second seed to avoid seed collision. -// If not specified, defaults to 0 -func AllCandidateSamplerSeed2(value int64) AllCandidateSamplerAttr { +// ResourceGatherValidateIndices sets the optional validate_indices attribute to value. +// If not specified, defaults to true +func ResourceGatherValidateIndices(value bool) ResourceGatherAttr { return func(m optionalAttr) { - m["seed2"] = value + m["validate_indices"] = value } } -// Generates labels for candidate sampling with a learned unigram distribution. -// -// See explanations of candidate sampling and the data formats at -// go/candidate-sampling. +// Gather slices from the variable pointed to by `resource` according to `indices`. // -// For each batch, this op picks a single set of sampled candidate labels. +// `indices` must be an integer tensor of any dimension (usually 0-D or 1-D). +// Produces an output tensor with shape `indices.shape + params.shape[1:]` where: // -// The advantages of sampling candidates per-batch are simplicity and the -// possibility of efficient dense matrix multiplication. The disadvantage is that -// the sampled candidates must be chosen independently of the context and of the -// true labels. +// ```python +// # Scalar indices +// output[:, ..., :] = params[indices, :, ... :] // -// Arguments: -// true_classes: A batch_size * num_true matrix, in which each row contains the -// IDs of the num_true target_classes in the corresponding original label. -// num_true: Number of true labels per context. -// num_sampled: Number of candidates to produce. -// unique: If unique is true, we sample with rejection, so that all sampled -// candidates in a batch are unique. This requires some approximation to -// estimate the post-rejection sampling probabilities. +// # Vector indices +// output[i, :, ..., :] = params[indices[i], :, ... :] // -// Returns A vector of length num_sampled, in which each element is -// the ID of a sampled candidate.A batch_size * num_true matrix, representing -// the number of times each candidate is expected to occur in a batch -// of sampled candidates. If unique=true, then this is a probability.A vector of length num_sampled, for each sampled -// candidate representing the number of times the candidate is expected -// to occur in a batch of sampled candidates. If unique=true, then this is a -// probability. -func AllCandidateSampler(scope *Scope, true_classes tf.Output, num_true int64, num_sampled int64, unique bool, optional ...AllCandidateSamplerAttr) (sampled_candidates tf.Output, true_expected_count tf.Output, sampled_expected_count tf.Output) { +// # Higher rank indices +// output[i, ..., j, :, ... :] = params[indices[i, ..., j], :, ..., :] +// ``` +func ResourceGather(scope *Scope, resource tf.Output, indices tf.Output, dtype tf.DataType, optional ...ResourceGatherAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"num_true": num_true, "num_sampled": num_sampled, "unique": unique} + attrs := map[string]interface{}{"dtype": dtype} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "AllCandidateSampler", + Type: "ResourceGather", Input: []tf.Input{ - true_classes, + resource, indices, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return op.Output(0) +} + +// Delete the TensorArray from its resource container. +// +// This enables the user to close and release the resource in the middle +// of a step/run. +// +// Arguments: +// handle: The handle to a TensorArray (output of TensorArray or TensorArrayGrad). +// +// Returns the created operation. +func TensorArrayCloseV3(scope *Scope, handle tf.Output) (o *tf.Operation) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "TensorArrayCloseV3", + Input: []tf.Input{ + handle, + }, + } + return scope.AddOperation(opspec) } // Saves the input tensors to disk. @@ -18964,242 +19406,32 @@ func RandomUniformIntSeed2(value int64) RandomUniformIntAttr { // Outputs random integers from a uniform distribution. // -// The generated values are uniform integers in the range `[minval, maxval)`. -// The lower bound `minval` is included in the range, while the upper bound -// `maxval` is excluded. -// -// The random integers are slightly biased unless `maxval - minval` is an exact -// power of two. The bias is small for values of `maxval - minval` significantly -// smaller than the range of the output (either `2^32` or `2^64`). -// -// Arguments: -// shape: The shape of the output tensor. -// minval: 0-D. Inclusive lower bound on the generated integers. -// maxval: 0-D. Exclusive upper bound on the generated integers. -// -// Returns A tensor of the specified shape filled with uniform random integers. -func RandomUniformInt(scope *Scope, shape tf.Output, minval tf.Output, maxval tf.Output, optional ...RandomUniformIntAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "RandomUniformInt", - Input: []tf.Input{ - shape, minval, maxval, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// SkipgramAttr is an optional argument to Skipgram. -type SkipgramAttr func(optionalAttr) - -// SkipgramWindowSize sets the optional window_size attribute to value. -// -// value: The number of words to predict to the left and right of the target. -// If not specified, defaults to 5 -func SkipgramWindowSize(value int64) SkipgramAttr { - return func(m optionalAttr) { - m["window_size"] = value - } -} - -// SkipgramMinCount sets the optional min_count attribute to value. -// -// value: The minimum number of word occurrences for it to be included in the -// vocabulary. -// If not specified, defaults to 5 -func SkipgramMinCount(value int64) SkipgramAttr { - return func(m optionalAttr) { - m["min_count"] = value - } -} - -// SkipgramSubsample sets the optional subsample attribute to value. -// -// value: Threshold for word occurrence. Words that appear with higher -// frequency will be randomly down-sampled. Set to 0 to disable. -// If not specified, defaults to 0.001 -func SkipgramSubsample(value float32) SkipgramAttr { - return func(m optionalAttr) { - m["subsample"] = value - } -} - -// Parses a text file and creates a batch of examples. -// -// DEPRECATED at GraphDef version 19: Moving word2vec into tensorflow_models/tutorials and deprecating its ops here as a result -// -// Arguments: -// filename: The corpus's text file name. -// batch_size: The size of produced batch. -// -// Returns A vector of words in the corpus.Frequencies of words. Sorted in the non-ascending order.Number of words per epoch in the data file.The current epoch number.The total number of words processed so far.A vector of word ids.A vector of word ids. -func Skipgram(scope *Scope, filename string, batch_size int64, optional ...SkipgramAttr) (vocab_word tf.Output, vocab_freq tf.Output, words_per_epoch tf.Output, current_epoch tf.Output, total_words_processed tf.Output, examples tf.Output, labels tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"filename": filename, "batch_size": batch_size} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Skipgram", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4), op.Output(5), op.Output(6) -} - -// StringToNumberAttr is an optional argument to StringToNumber. -type StringToNumberAttr func(optionalAttr) - -// StringToNumberOutType sets the optional out_type attribute to value. -// -// value: The numeric type to interpret each string in `string_tensor` as. -// If not specified, defaults to DT_FLOAT -func StringToNumberOutType(value tf.DataType) StringToNumberAttr { - return func(m optionalAttr) { - m["out_type"] = value - } -} - -// Converts each string in the input Tensor to the specified numeric type. -// -// (Note that int32 overflow results in an error while float overflow -// results in a rounded value.) -// -// Returns A Tensor of the same shape as the input `string_tensor`. -func StringToNumber(scope *Scope, string_tensor tf.Output, optional ...StringToNumberAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "StringToNumber", - Input: []tf.Input{ - string_tensor, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ResourceApplyFtrlV2Attr is an optional argument to ResourceApplyFtrlV2. -type ResourceApplyFtrlV2Attr func(optionalAttr) - -// ResourceApplyFtrlV2UseLocking sets the optional use_locking attribute to value. -// -// value: If `True`, updating of the var and accum tensors will be protected -// by a lock; otherwise the behavior is undefined, but may exhibit less -// contention. -// If not specified, defaults to false -func ResourceApplyFtrlV2UseLocking(value bool) ResourceApplyFtrlV2Attr { - return func(m optionalAttr) { - m["use_locking"] = value - } -} - -// Update '*var' according to the Ftrl-proximal scheme. -// -// grad_with_shrinkage = grad + 2 * l2_shrinkage * var -// accum_new = accum + grad_with_shrinkage * grad_with_shrinkage -// linear += grad_with_shrinkage + -// (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var -// quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2 -// var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0 -// accum = accum_new -// -// Arguments: -// var_: Should be from a Variable(). -// accum: Should be from a Variable(). -// linear: Should be from a Variable(). -// grad: The gradient. -// lr: Scaling factor. Must be a scalar. -// l1: L1 regulariation. Must be a scalar. -// l2: L2 shrinkage regulariation. Must be a scalar. -// -// lr_power: Scaling factor. Must be a scalar. -// -// Returns the created operation. -func ResourceApplyFtrlV2(scope *Scope, var_ tf.Output, accum tf.Output, linear tf.Output, grad tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, l2_shrinkage tf.Output, lr_power tf.Output, optional ...ResourceApplyFtrlV2Attr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ResourceApplyFtrlV2", - Input: []tf.Input{ - var_, accum, linear, grad, lr, l1, l2, l2_shrinkage, lr_power, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// TruncatedNormalAttr is an optional argument to TruncatedNormal. -type TruncatedNormalAttr func(optionalAttr) - -// TruncatedNormalSeed sets the optional seed attribute to value. -// -// value: If either `seed` or `seed2` are set to be non-zero, the random number -// generator is seeded by the given seed. Otherwise, it is seeded by a -// random seed. -// If not specified, defaults to 0 -func TruncatedNormalSeed(value int64) TruncatedNormalAttr { - return func(m optionalAttr) { - m["seed"] = value - } -} - -// TruncatedNormalSeed2 sets the optional seed2 attribute to value. -// -// value: A second seed to avoid seed collision. -// If not specified, defaults to 0 -func TruncatedNormalSeed2(value int64) TruncatedNormalAttr { - return func(m optionalAttr) { - m["seed2"] = value - } -} - -// Outputs random values from a truncated normal distribution. -// -// The generated values follow a normal distribution with mean 0 and standard -// deviation 1, except that values whose magnitude is more than 2 standard -// deviations from the mean are dropped and re-picked. +// The generated values are uniform integers in the range `[minval, maxval)`. +// The lower bound `minval` is included in the range, while the upper bound +// `maxval` is excluded. +// +// The random integers are slightly biased unless `maxval - minval` is an exact +// power of two. The bias is small for values of `maxval - minval` significantly +// smaller than the range of the output (either `2^32` or `2^64`). // // Arguments: // shape: The shape of the output tensor. -// dtype: The type of the output. +// minval: 0-D. Inclusive lower bound on the generated integers. +// maxval: 0-D. Exclusive upper bound on the generated integers. // -// Returns A tensor of the specified shape filled with random truncated normal -// values. -func TruncatedNormal(scope *Scope, shape tf.Output, dtype tf.DataType, optional ...TruncatedNormalAttr) (output tf.Output) { +// Returns A tensor of the specified shape filled with uniform random integers. +func RandomUniformInt(scope *Scope, shape tf.Output, minval tf.Output, maxval tf.Output, optional ...RandomUniformIntAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"dtype": dtype} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "TruncatedNormal", + Type: "RandomUniformInt", Input: []tf.Input{ - shape, + shape, minval, maxval, }, Attrs: attrs, } @@ -19325,113 +19557,6 @@ func OrderedMapIncompleteSize(scope *Scope, dtypes []tf.DataType, optional ...Or return op.Output(0) } -// DecodeRawAttr is an optional argument to DecodeRaw. -type DecodeRawAttr func(optionalAttr) - -// DecodeRawLittleEndian sets the optional little_endian attribute to value. -// -// value: Whether the input `bytes` are in little-endian order. -// Ignored for `out_type` values that are stored in a single byte like -// `uint8`. -// If not specified, defaults to true -func DecodeRawLittleEndian(value bool) DecodeRawAttr { - return func(m optionalAttr) { - m["little_endian"] = value - } -} - -// Reinterpret the bytes of a string as a vector of numbers. -// -// Arguments: -// bytes: All the elements must have the same length. -// -// -// Returns A Tensor with one more dimension than the input `bytes`. The -// added dimension will have size equal to the length of the elements -// of `bytes` divided by the number of bytes to represent `out_type`. -func DecodeRaw(scope *Scope, bytes tf.Output, out_type tf.DataType, optional ...DecodeRawAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"out_type": out_type} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "DecodeRaw", - Input: []tf.Input{ - bytes, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Copy a tensor setting everything outside a central band in each innermost matrix -// -// to zero. -// -// The `band` part is computed as follows: -// Assume `input` has `k` dimensions `[I, J, K, ..., M, N]`, then the output is a -// tensor with the same shape where -// -// `band[i, j, k, ..., m, n] = in_band(m, n) * input[i, j, k, ..., m, n]`. -// -// The indicator function -// -// `in_band(m, n) = (num_lower < 0 || (m-n) <= num_lower)) && -// (num_upper < 0 || (n-m) <= num_upper)`. -// -// For example: -// -// ``` -// # if 'input' is [[ 0, 1, 2, 3] -// [-1, 0, 1, 2] -// [-2, -1, 0, 1] -// [-3, -2, -1, 0]], -// -// tf.matrix_band_part(input, 1, -1) ==> [[ 0, 1, 2, 3] -// [-1, 0, 1, 2] -// [ 0, -1, 0, 1] -// [ 0, 0, -1, 0]], -// -// tf.matrix_band_part(input, 2, 1) ==> [[ 0, 1, 0, 0] -// [-1, 0, 1, 0] -// [-2, -1, 0, 1] -// [ 0, -2, -1, 0]] -// ``` -// -// Useful special cases: -// -// ``` -// tf.matrix_band_part(input, 0, -1) ==> Upper triangular part. -// tf.matrix_band_part(input, -1, 0) ==> Lower triangular part. -// tf.matrix_band_part(input, 0, 0) ==> Diagonal. -// ``` -// -// Arguments: -// input: Rank `k` tensor. -// num_lower: 0-D tensor. Number of subdiagonals to keep. If negative, keep entire -// lower triangle. -// num_upper: 0-D tensor. Number of superdiagonals to keep. If negative, keep -// entire upper triangle. -// -// Returns Rank `k` tensor of the same shape as input. The extracted banded tensor. -func MatrixBandPart(scope *Scope, input tf.Output, num_lower tf.Output, num_upper tf.Output) (band tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "MatrixBandPart", - Input: []tf.Input{ - input, num_lower, num_upper, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // Counts the number of occurrences of each value in an integer array. // // Outputs a vector with length `size` and the same dtype as `weights`. If @@ -21159,7 +21284,7 @@ func ImageSummaryBadColor(value tf.Tensor) ImageSummaryAttr { // generated sequentially as '*tag*/image/0', '*tag*/image/1', etc. // // The `bad_color` argument is the color to use in the generated images for -// non-finite input values. It is a `uint8` 1-D tensor of length `channels`. +// non-finite input values. It is a `unit8` 1-D tensor of length `channels`. // Each element must be in the range `[0, 255]` (It represents the value of a // pixel in the output image). Non-finite values in the input tensor are // replaced by this tensor in the output image. The default value is the color @@ -30569,128 +30694,3 @@ func EditDistance(scope *Scope, hypothesis_indices tf.Output, hypothesis_values op := scope.AddOperation(opspec) return op.Output(0) } - -// Gather slices from `params` into a Tensor with shape specified by `indices`. -// -// `indices` is an K-dimensional integer tensor, best thought of as a -// (K-1)-dimensional tensor of indices into `params`, where each element defines a -// slice of `params`: -// -// output[i_0, ..., i_{K-2}] = params[indices[i0, ..., i_{K-2}]] -// -// Whereas in @{tf.gather} `indices` defines slices into the first -// dimension of `params`, in `tf.gather_nd`, `indices` defines slices into the -// first `N` dimensions of `params`, where `N = indices.shape[-1]`. -// -// The last dimension of `indices` can be at most the rank of -// `params`: -// -// indices.shape[-1] <= params.rank -// -// The last dimension of `indices` corresponds to elements -// (if `indices.shape[-1] == params.rank`) or slices -// (if `indices.shape[-1] < params.rank`) along dimension `indices.shape[-1]` -// of `params`. The output tensor has shape -// -// indices.shape[:-1] + params.shape[indices.shape[-1]:] -// -// Note that on CPU, if an out of bound index is found, an error is returned. -// On GPU, if an out of bound index is found, a 0 is stored in the -// corresponding output value. -// -// Some examples below. -// -// Simple indexing into a matrix: -// -// ```python -// indices = [[0, 0], [1, 1]] -// params = [['a', 'b'], ['c', 'd']] -// output = ['a', 'd'] -// ``` -// -// Slice indexing into a matrix: -// -// ```python -// indices = [[1], [0]] -// params = [['a', 'b'], ['c', 'd']] -// output = [['c', 'd'], ['a', 'b']] -// ``` -// -// Indexing into a 3-tensor: -// -// ```python -// indices = [[1]] -// params = [[['a0', 'b0'], ['c0', 'd0']], -// [['a1', 'b1'], ['c1', 'd1']]] -// output = [[['a1', 'b1'], ['c1', 'd1']]] -// -// -// indices = [[0, 1], [1, 0]] -// params = [[['a0', 'b0'], ['c0', 'd0']], -// [['a1', 'b1'], ['c1', 'd1']]] -// output = [['c0', 'd0'], ['a1', 'b1']] -// -// -// indices = [[0, 0, 1], [1, 0, 1]] -// params = [[['a0', 'b0'], ['c0', 'd0']], -// [['a1', 'b1'], ['c1', 'd1']]] -// output = ['b0', 'b1'] -// ``` -// -// Batched indexing into a matrix: -// -// ```python -// indices = [[[0, 0]], [[0, 1]]] -// params = [['a', 'b'], ['c', 'd']] -// output = [['a'], ['b']] -// ``` -// -// Batched slice indexing into a matrix: -// -// ```python -// indices = [[[1]], [[0]]] -// params = [['a', 'b'], ['c', 'd']] -// output = [[['c', 'd']], [['a', 'b']]] -// ``` -// -// Batched indexing into a 3-tensor: -// -// ```python -// indices = [[[1]], [[0]]] -// params = [[['a0', 'b0'], ['c0', 'd0']], -// [['a1', 'b1'], ['c1', 'd1']]] -// output = [[[['a1', 'b1'], ['c1', 'd1']]], -// [[['a0', 'b0'], ['c0', 'd0']]]] -// -// indices = [[[0, 1], [1, 0]], [[0, 0], [1, 1]]] -// params = [[['a0', 'b0'], ['c0', 'd0']], -// [['a1', 'b1'], ['c1', 'd1']]] -// output = [[['c0', 'd0'], ['a1', 'b1']], -// [['a0', 'b0'], ['c1', 'd1']]] -// -// -// indices = [[[0, 0, 1], [1, 0, 1]], [[0, 1, 1], [1, 1, 0]]] -// params = [[['a0', 'b0'], ['c0', 'd0']], -// [['a1', 'b1'], ['c1', 'd1']]] -// output = [['b0', 'b1'], ['d0', 'c1']] -// ``` -// -// Arguments: -// params: The tensor from which to gather values. -// indices: Index tensor. -// -// Returns Values from `params` gathered from indices given by `indices`, with -// shape `indices.shape[:-1] + params.shape[indices.shape[-1]:]`. -func GatherNd(scope *Scope, params tf.Output, indices tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "GatherNd", - Input: []tf.Input{ - params, indices, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} -- GitLab From 8fa27b1903ceedb25da5649aa17160866dda734d Mon Sep 17 00:00:00 2001 From: Billy Lamberta Date: Mon, 23 Apr 2018 22:08:52 -0700 Subject: [PATCH 188/434] docs: Clean up install_linux with pip --- tensorflow/docs_src/install/install_linux.md | 342 ++++++++----------- 1 file changed, 151 insertions(+), 191 deletions(-) diff --git a/tensorflow/docs_src/install/install_linux.md b/tensorflow/docs_src/install/install_linux.md index b7b0fc7d3d..9b431e49ee 100644 --- a/tensorflow/docs_src/install/install_linux.md +++ b/tensorflow/docs_src/install/install_linux.md @@ -103,248 +103,203 @@ the specified versions. If upgrading is not possible, then you may still run TensorFlow with GPU support, if you @{$install_sources$install TensorFlow from Sources}. -## Determine how to install TensorFlow - -You must pick the mechanism by which you install TensorFlow. The -supported choices are as follows: - - * [Virtualenv](#InstallingVirtualenv) - * ["native" pip](#InstallingNativePip) - * [Docker](#InstallingDocker) - * [Anaconda](#InstallingAnaconda) - * installing from sources, which is documented in - [a separate guide](https://www.tensorflow.org/install/install_sources). - -**We recommend the Virtualenv installation.** -[Virtualenv](https://virtualenv.pypa.io/en/stable/) -is a virtual Python environment isolated from other Python development, -incapable of interfering with or being affected by other Python programs -on the same machine. During the Virtualenv installation process, -you will install not only TensorFlow but also all the packages that -TensorFlow requires. (This is actually pretty easy.) -To start working with TensorFlow, you simply need to "activate" the -virtual environment. All in all, Virtualenv provides a safe and -reliable mechanism for installing and running TensorFlow. - -Native pip installs TensorFlow directly on your system without going -through any container system. **We recommend the native pip install for -system administrators aiming to make TensorFlow available to everyone on a -multi-user system.** Since a native pip installation is not walled-off in -a separate container, the pip installation might interfere with other -Python-based installations on your system. However, if you understand pip -and your Python environment, a native pip installation often entails only -a single command. +## How to install TensorFlow -Docker completely isolates the TensorFlow installation -from pre-existing packages on your machine. The Docker container contains -TensorFlow and all its dependencies. Note that the Docker image can be quite -large (hundreds of MBs). You might choose the Docker installation if you are -incorporating TensorFlow into a larger application architecture that already -uses Docker. - -In Anaconda, you may use conda to create a virtual environment. -However, within Anaconda, we recommend installing TensorFlow with the -`pip install` command, not with the `conda install` command. - -**NOTE:** The conda package is community supported, not officially supported. -That is, the TensorFlow team neither tests nor maintains the conda package. -Use that package at your own risk. +There are a few options to install TensorFlow on your machine: +* [Use pip in a virtual environment](#InstallingVirtualenv) *(recommended)* +* [Use pip in your system environment](#InstallingNativePip) +* [Configure a Docker container](#InstallingDocker) +* [Use pip in Anaconda](#InstallingAnaconda) +* [Install TensorFlow from source](/install/install_sources) -## Installing with Virtualenv - -Take the following steps to install TensorFlow with Virtualenv: - - 1. Install pip and Virtualenv by issuing one of the following commands: - -
$ sudo apt-get install python-pip python-dev python-virtualenv # for Python 2.7
-    $ sudo apt-get install python3-pip python3-dev python-virtualenv # for Python 3.n
- - 2. Create a Virtualenv environment by issuing one of the following commands: +### Use `pip` in a virtual environment -
$ virtualenv --system-site-packages targetDirectory # for Python 2.7
-    $ virtualenv --system-site-packages -p python3 targetDirectory # for Python 3.n
+This is the *recommended* install method. The +[Virtualenv](https://virtualenv.pypa.io/en/stable/) tool creates virtual Python +environments that are isolated from other Python development on the same machine. +In this scenario, you install TensorFlow and its dependencies within a virtual +environment that is available when *activated*. Virtualenv provides a reliable +way to install and run TensorFlow while avoiding conflicts with the rest of the +system. - where targetDirectory specifies the top of the - Virtualenv tree. Our instructions assume that - targetDirectory is `~/tensorflow`, but you may - choose any directory. +1\. On Ubuntu, install the `pip` and `virtualenv` packages: - 3. Activate the Virtualenv environment by issuing one of the following - commands: - -
$ source ~/tensorflow/bin/activate # bash, sh, ksh, or zsh
-    $ source ~/tensorflow/bin/activate.csh  # csh or tcsh
-    $ . ~/tensorflow/bin/activate.fish  # fish
- - The preceding source command should change your prompt - to the following: - -
(tensorflow)$ 
- - 4. Ensure pip ≥8.1 is installed: - -
(tensorflow)$ easy_install -U pip
+
+  sudo apt-get install python-pip python-dev python-virtualenv   # for Python 2.7
+  sudo apt-get install python3-pip python3-dev python-virtualenv # for Python 3.n
+
- 5. Issue one of the following commands to install TensorFlow in the active - Virtualenv environment: +2\. Create a directory for the virtual environment and choose a Python +interpreter: -
(tensorflow)$ pip install --upgrade tensorflow      # for Python 2.7
-    (tensorflow)$ pip3 install --upgrade tensorflow     # for Python 3.n
-    (tensorflow)$ pip install --upgrade tensorflow-gpu  # for Python 2.7 and GPU
-    (tensorflow)$ pip3 install --upgrade tensorflow-gpu # for Python 3.n and GPU
+
+  mkdir ~/tensorflow  # somewhere to work out of
+  cd ~/tensorflow
+  # Choose one of the following Python environments for the ./venv directory:
+  virtualenv --system-site-packages venv            # Use python default (Python 2.7)
+  virtualenv --system-site-packages -p python3 venv # Use Python 3.n
+
- If the above command succeeds, skip Step 6. If the preceding - command fails, perform Step 6. +3\. Activate the Virtualenv environment using one of these shell commands: - 6. (Optional) If Step 5 failed (typically because you invoked a pip version - lower than 8.1), install TensorFlow in the active Virtualenv environment - by issuing a command of the following format: +
+  source ~/tensorflow/venv/bin/activate      # bash, sh, ksh, or zsh
+  source ~/tensorflow/venv/bin/activate.csh  # csh or tcsh
+  . ~/tensorflow/venv/bin/activate.fish      # fish
+
-
(tensorflow)$ pip install --upgrade tfBinaryURL   # Python 2.7
-    (tensorflow)$ pip3 install --upgrade tfBinaryURL  # Python 3.n 
+When the Virtualenv is activated, the shell prompt displays as `(venv) $`. - where tfBinaryURL identifies the URL of the - TensorFlow Python package. The appropriate value of - tfBinaryURLdepends on the operating system, - Python version, and GPU support. Find the appropriate value for - tfBinaryURL for your system - [here](#the_url_of_the_tensorflow_python_package). For example, if you - are installing TensorFlow for Linux, Python 3.4, and CPU-only support, - issue the following command to install TensorFlow in the active - Virtualenv environment: +4\. Upgrade `pip` in your virtual environment: -
(tensorflow)$ pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc1-cp34-cp34m-linux_x86_64.whl
+See the [pip installation guide](https://pip.pypa.io/en/stable/installing/) for +instructions, or use `easy_install`: -If you encounter installation problems, see -[Common Installation Problems](#common_installation_problems). +
+(venv)$ easy_install -U pip
+
+5\. Within an active Virtualenv environment, use one of the following `pip` +commands to install the TensorFlow package: -### Next Steps +
+(venv)$ pip install --upgrade tensorflow      # for Python 2.7
+(venv)$ pip3 install --upgrade tensorflow     # for Python 3.n
+(venv)$ pip install --upgrade tensorflow-gpu  # for Python 2.7 and GPU
+
-After installing TensorFlow, -[validate the installation](#ValidateYourInstallation). +Success! TensorFlow is now installed. -Note that you must activate the Virtualenv environment each time you -use TensorFlow. If the Virtualenv environment is not currently active, -invoke one of the following commands: +Use `pip list` to show the packages installed in the virtual environment. +[Validate the install](#ValidateYourInstallation) and test the version: -
$ source ~/tensorflow/bin/activate      # bash, sh, ksh, or zsh
-$ source ~/tensorflow/bin/activate.csh  # csh or tcsh
+
+(venv)$ python -c "import tensorflow as tf; print(tf.__version__)"
+
-When the Virtualenv environment is active, you may run -TensorFlow programs from this shell. Your prompt will become -the following to indicate that your tensorflow environment is active: +Use the `deactivate` command to stop the Python virtual environment. -
(tensorflow)$ 
+#### Problems -When you are done using TensorFlow, you may deactivate the -environment by invoking the `deactivate` function as follows: +If the above steps failed, try installing the TensorFlow binary using the remote +URL of the `pip` package: -
(tensorflow)$ deactivate 
+
+(venv)$ pip install --upgrade remote-pkg-URL   # Python 2.7
+(venv)$ pip3 install --upgrade remote-pkg-URL  # Python 3.n
+
-The prompt will revert back to your default prompt (as defined by the -`PS1` environment variable). +The remote-pkg-URL depends on the operating system, Python version, +and GPU support. See [here](#the_url_of_the_tensorflow_python_package) for the +URL naming scheme and location. +See [Common Installation Problems](#common_installation_problems) if you +encounter problems. -### Uninstalling TensorFlow +#### Uninstall TensorFlow -To uninstall TensorFlow, simply remove the tree you created. -For example: +To uninstall TensorFlow, remove the Virtualenv directory you created in step 2: -
$ rm -r targetDirectory 
+
+  deactivate  # stop the virtualenv
+  rm -r ~/tensorflow/venv
+
-## Installing with native pip - -You may install TensorFlow through pip, choosing between a simple -installation procedure or a more complex one. +### Use `pip` in your system environment -**Note:** The -[REQUIRED_PACKAGES section of setup.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tools/pip_package/setup.py) -lists the TensorFlow packages that pip will install or upgrade. +Use `pip` to install the TensorFlow package directly on your system without +using a container or virtual environment for isolation. This method is +recommended for system administrators that want a TensorFlow installation that is +available to everyone on a multi-user system. +Since a system install is not isolated, it could interfere with other +Python-based installations. But if you understand `pip` and your Python +environment, a system `pip` install is straightforward. -### Prerequisite: Python and Pip +See the +[REQUIRED_PACKAGES section of setup.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tools/pip_package/setup.py) +for a list of TensorFlow packages that `pip` installs or upgrade`. -Python is automatically installed on Ubuntu. Take a moment to confirm -(by issuing a `python -V` command) that one of the following Python -versions is already installed on your system: - * Python 2.7 - * Python 3.4+ +#### Install Python and `pip` -The pip or pip3 package manager is *usually* installed on Ubuntu. Take a -moment to confirm (by issuing a `pip -V` or `pip3 -V` command) -that pip or pip3 is installed. We strongly recommend version 8.1 or higher -of pip or pip3. If Version 8.1 or later is not installed, issue the -following command, which will either install or upgrade to the latest -pip version: +On Ubuntu, Python is automatically installed and `pip` is *usually* installed. +Confirm the `python` and `pip` versions: -
$ sudo apt-get install python-pip python-dev   # for Python 2.7
-$ sudo apt-get install python3-pip python3-dev # for Python 3.n
+
+  python -V
+  pip -V  # or: pip3 -V
 
+We *strongly recommend* `pip` or `pip3` version 8.1 or higher. If using a release +before version 8.1, upgrade `pip`: -### Install TensorFlow - -Assuming the prerequisite software is installed on your Linux host, -take the following steps: +
+  sudo apt-get install python-pip python-dev   # for Python 2.7
+  sudo apt-get install python3-pip python3-dev # for Python 3.n
+
- 1. Install TensorFlow by invoking **one** of the following commands: -
$ pip install tensorflow      # Python 2.7; CPU support (no GPU support)
-    $ pip3 install tensorflow     # Python 3.n; CPU support (no GPU support)
-    $ pip install tensorflow-gpu  # Python 2.7;  GPU support
-    $ pip3 install tensorflow-gpu # Python 3.n; GPU support 
+#### Install TensorFlow - If the preceding command runs to completion, you should now - [validate your installation](#ValidateYourInstallation). +Install one of the available TensorFlow packages: - 2. (Optional.) If Step 1 failed, install the latest version of TensorFlow - by issuing a command of the following format: +
+  # Select one:
+  sudo pip install tensorflow      # Python 2.7 CPU (no GPU support)
+  sudo pip3 install tensorflow     # Python 3.n CPU (no GPU support)
+  sudo pip install tensorflow-gpu  # Python 2.7 GPU support
+  sudo pip3 install tensorflow-gpu # Python 3.n GPU support
+
-
$ sudo pip  install --upgrade tfBinaryURL   # Python 2.7
-    $ sudo pip3 install --upgrade tfBinaryURL   # Python 3.n 
+Success! TensorFlow is now installed. - where tfBinaryURL identifies the URL of the - TensorFlow Python package. The appropriate value of - tfBinaryURL depends on the operating system, - Python version, and GPU support. Find the appropriate value for - tfBinaryURL - [here](#the_url_of_the_tensorflow_python_package). For example, to - install TensorFlow for Linux, Python 3.4, and CPU-only support, issue - the following command: +Use `pip list` to show the packages installed on the system. +[Validate the install](#ValidateYourInstallation) and test the version: -
-     $ sudo pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc1-cp34-cp34m-linux_x86_64.whl
-     
+
+  python -c "import tensorflow as tf; print(tf.__version__)"
+
- If this step fails, see - [Common Installation Problems](#common_installation_problems). +#### Problems +If the above steps failed, try installing the TensorFlow binary using the remote +URL of the `pip` package: -### Next Steps +
+  sudo pip install --upgrade remote-pkg-URL   # Python 2.7
+  sudo pip3 install --upgrade remote-pkg-URL  # Python 3.n
+
-After installing TensorFlow, [validate your installation](#ValidateYourInstallation). +The remote-pkg-URL depends on the operating system, Python version, +and GPU support. See [here](#the_url_of_the_tensorflow_python_package) for the +URL naming scheme and location. +See [Common Installation Problems](#common_installation_problems) if you +encounter problems. -### Uninstalling TensorFlow +#### Uninstall TensorFlow -To uninstall TensorFlow, issue one of following commands: +To uninstall TensorFlow on your system, use one of following commands: -
-$ sudo pip uninstall tensorflow  # for Python 2.7
-$ sudo pip3 uninstall tensorflow # for Python 3.n
+
+  sudo pip uninstall tensorflow   # for Python 2.7
+  sudo pip3 uninstall tensorflow  # for Python 3.n
 
- -## Installing with Docker +### Configure a Docker container + +Docker completely isolates the TensorFlow installation +from pre-existing packages on your machine. The Docker container contains +TensorFlow and all its dependencies. Note that the Docker image can be quite +large (hundreds of MBs). You might choose the Docker installation if you are +incorporating TensorFlow into a larger application architecture that already +uses Docker. Take the following steps to install TensorFlow through Docker: @@ -364,7 +319,7 @@ Take the following steps to install TensorFlow through Docker: The remainder of this section explains how to launch a Docker container. -### CPU-only +#### CPU-only To launch a Docker container with CPU-only support (that is, without GPU support), enter a command of the following format: @@ -414,7 +369,7 @@ $ docker run -it -p 8888:8888 tensorflow/tensorflow Docker will download the TensorFlow binary image the first time you launch it. -### GPU support +#### GPU support Prior to installing TensorFlow with GPU support, ensure that your system meets all [NVIDIA software requirements](#NVIDIARequirements). To launch a Docker container @@ -470,14 +425,22 @@ For more details see the [TensorFlow docker readme](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/tools/docker). -### Next Steps +#### Next Steps You should now [validate your installation](#ValidateYourInstallation). -## Installing with Anaconda +### Use `pip` in Anaconda + +Anaconda provides the `conda` utility to create a virtual environment. However, +within Anaconda, we recommend installing TensorFlow using the `pip install` +command and *not* with the `conda install` command. + +Caution: `conda` is a community supported package this is not officially +maintained by the TensorFlow team. Use this package at your own risk since it is +not tested on new TensorFlow releases. Take the following steps to install TensorFlow in an Anaconda environment: @@ -563,10 +526,7 @@ installation problems](#common_installation_problems). If you are new to machine learning, we recommend the following: * [Machine Learning Crash Course](https://developers.google.com/machine-learning/crash-course) -* @{$get_started/get_started_for_beginners$Getting Started for ML Beginners} - -If you are experienced with machine learning but new to TensorFlow, see -@{$get_started/premade_estimators$Getting Started with TensorFlow}. +* @{$get_started/eager} ## Common installation problems @@ -581,7 +541,7 @@ ask a new question about it on Stack Overflow and specify the `tensorflow` tag.
Version:CPU/GPU:Python Version:Compiler:Build Tools:cuDNN:CUDA:
tensorflow-1.8.0CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
tensorflow_gpu-1.8.0GPU3.5-3.6MSVC 2015 update 3Cmake v3.6.379
tensorflow-1.7.0CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
tensorflow_gpu-1.7.0GPU3.5-3.6MSVC 2015 update 3Cmake v3.6.379
tensorflow-1.6.0CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
- + -- GitLab From 9c5c558cba9069dfedfde9431ed13227b3893bbf Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 23 Apr 2018 22:36:35 -0700 Subject: [PATCH 189/434] Make ClientLibraryTestBase::CreateScalarRelu return XlaComputation. PiperOrigin-RevId: 194036707 --- tensorflow/compiler/xla/tests/client_library_test_base.cc | 4 ++-- tensorflow/compiler/xla/tests/client_library_test_base.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/compiler/xla/tests/client_library_test_base.cc b/tensorflow/compiler/xla/tests/client_library_test_base.cc index 31c9e21644..c09a6d71c9 100644 --- a/tensorflow/compiler/xla/tests/client_library_test_base.cc +++ b/tensorflow/compiler/xla/tests/client_library_test_base.cc @@ -621,8 +621,8 @@ ClientLibraryTestBase::ComputeValueAndReference( return std::make_pair(std::move(reference), std::move(result)); } -Computation ClientLibraryTestBase::CreateScalarRelu() { - ComputationBuilder builder(client_, "relu"); +XlaComputation ClientLibraryTestBase::CreateScalarRelu() { + XlaBuilder builder("relu"); auto shape = ShapeUtil::MakeShape(use_bfloat16_ ? BF16 : F32, {}); auto z_value = builder.Parameter(0, shape, "z_value"); auto zero = use_bfloat16_ diff --git a/tensorflow/compiler/xla/tests/client_library_test_base.h b/tensorflow/compiler/xla/tests/client_library_test_base.h index 85ebe29ae9..c303a4562e 100644 --- a/tensorflow/compiler/xla/tests/client_library_test_base.h +++ b/tensorflow/compiler/xla/tests/client_library_test_base.h @@ -255,7 +255,7 @@ class ClientLibraryTestBase : public ::testing::Test { ErrorSpec error); // Create scalar operations for use in reductions. - Computation CreateScalarRelu(); + XlaComputation CreateScalarRelu(); Computation CreateScalarMax(); Computation CreateScalarReluSensitivity(); -- GitLab From d75f2bf9041c7d50c932e48a175c9d5ab0bd0075 Mon Sep 17 00:00:00 2001 From: Igor Ganichev Date: Mon, 23 Apr 2018 22:36:39 -0700 Subject: [PATCH 190/434] Internal change PiperOrigin-RevId: 194036710 --- .../eager/python/examples/resnet50/BUILD | 11 ++++++ .../python/examples/resnet50/resnet50_test.py | 34 ++++++++++--------- 2 files changed, 29 insertions(+), 16 deletions(-) diff --git a/tensorflow/contrib/eager/python/examples/resnet50/BUILD b/tensorflow/contrib/eager/python/examples/resnet50/BUILD index 536cad998d..0c0e28dd95 100644 --- a/tensorflow/contrib/eager/python/examples/resnet50/BUILD +++ b/tensorflow/contrib/eager/python/examples/resnet50/BUILD @@ -14,6 +14,17 @@ py_library( ], ) +py_library( + name = "resnet50_test_lib", + srcs = ["resnet50_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":resnet50", + "//tensorflow:tensorflow_py", + "//tensorflow/contrib/eager/python:tfe", + ], +) + cuda_py_test( name = "resnet50_test", size = "large", diff --git a/tensorflow/contrib/eager/python/examples/resnet50/resnet50_test.py b/tensorflow/contrib/eager/python/examples/resnet50/resnet50_test.py index d6923293a3..09a0cd88d8 100644 --- a/tensorflow/contrib/eager/python/examples/resnet50/resnet50_test.py +++ b/tensorflow/contrib/eager/python/examples/resnet50/resnet50_test.py @@ -36,8 +36,8 @@ def device_and_data_format(): 'channels_last') -def random_batch(batch_size): - _, data_format = device_and_data_format() +def random_batch(batch_size, device_and_format=None): + _, data_format = device_and_format or device_and_data_format() shape = (3, 224, 224) if data_format == 'channels_first' else (224, 224, 3) shape = (batch_size,) + shape @@ -184,22 +184,23 @@ class ResNet50Benchmarks(tf.test.Benchmark): def _report(self, label, start, num_iters, device, batch_size, data_format): avg_time = (time.time() - start) / num_iters - dev = 'cpu' if 'cpu' in device else 'gpu' + dev = tf.DeviceSpec.from_string(device).device_type.lower() name = '%s_%s_batch_%d_%s' % (label, dev, batch_size, data_format) extras = {'examples_per_sec': batch_size / avg_time} self.report_benchmark( iters=num_iters, wall_time=avg_time, name=name, extras=extras) - def _force_gpu_sync(self): - # If this function is called in the context of a GPU device + def _force_device_sync(self): + # If this function is called in the context of a non-CPU device # (e.g., inside a 'with tf.device("/gpu:0")' block) - # then this will force a copy from CPU->GPU->CPU, which forces - # a sync. This is a roundabout way, yes. + # then this will force a copy from CPU->NON_CPU_DEVICE->CPU, + # which forces a sync. This is a roundabout way, yes. tf.constant(1.).cpu() - def _benchmark_eager_apply(self, label, defun=False, execution_mode=None): + def _benchmark_eager_apply(self, label, defun=False, execution_mode=None, + device_and_format=None): with tfe.execution_mode(execution_mode): - device, data_format = device_and_data_format() + device, data_format = device_and_format or device_and_data_format() model = resnet50.ResNet50(data_format) if defun: model.call = tfe.defun(model.call) @@ -207,7 +208,7 @@ class ResNet50Benchmarks(tf.test.Benchmark): num_burn = 5 num_iters = 30 with tf.device(device): - images, _ = random_batch(batch_size) + images, _ = random_batch(batch_size, device_and_format) for _ in xrange(num_burn): model(images, training=False).cpu() if execution_mode: @@ -220,7 +221,7 @@ class ResNet50Benchmarks(tf.test.Benchmark): tfe.async_wait() self._report(label, start, num_iters, device, batch_size, data_format) - def benchmark_eager_apply(self): + def benchmark_eager_apply_sync(self): self._benchmark_eager_apply('eager_apply', defun=False) def benchmark_eager_apply_async(self): @@ -234,11 +235,12 @@ class ResNet50Benchmarks(tf.test.Benchmark): label, make_iterator, defun=False, - execution_mode=None): + execution_mode=None, + device_and_format=None): with tfe.execution_mode(execution_mode): - device, data_format = device_and_data_format() + device, data_format = device_and_format or device_and_data_format() for batch_size in self._train_batch_sizes(): - (images, labels) = random_batch(batch_size) + (images, labels) = random_batch(batch_size, device_and_format) num_burn = 3 num_iters = 10 model = resnet50.ResNet50(data_format) @@ -253,7 +255,7 @@ class ResNet50Benchmarks(tf.test.Benchmark): train_one_step(model, images, labels, optimizer) if execution_mode: tfe.async_wait() - self._force_gpu_sync() + self._force_device_sync() gc.collect() start = time.time() @@ -262,7 +264,7 @@ class ResNet50Benchmarks(tf.test.Benchmark): train_one_step(model, images, labels, optimizer) if execution_mode: tfe.async_wait() - self._force_gpu_sync() + self._force_device_sync() self._report(label, start, num_iters, device, batch_size, data_format) def benchmark_eager_train(self): -- GitLab From 969be44f38d566b46b2d8a15958fd10db2b108fb Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 23 Apr 2018 23:18:11 -0700 Subject: [PATCH 191/434] Update ops-related pbtxt files. PiperOrigin-RevId: 194039856 --- .../core/ops/compat/ops_history.v1.pbtxt | 194 ++++++++++++++++++ tensorflow/core/ops/ops.pbtxt | 194 ++++++++++++++++++ 2 files changed, 388 insertions(+) diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt index 247f9edf5b..05dee30ca0 100644 --- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt +++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt @@ -1534,6 +1534,85 @@ op { } } } +op { + name: "ApplyAdaMax" + input_arg { + name: "var" + type_attr: "T" + is_ref: true + } + input_arg { + name: "m" + type_attr: "T" + is_ref: true + } + input_arg { + name: "v" + type_attr: "T" + is_ref: true + } + input_arg { + name: "beta1_power" + type_attr: "T" + } + input_arg { + name: "lr" + type_attr: "T" + } + input_arg { + name: "beta1" + type_attr: "T" + } + input_arg { + name: "beta2" + type_attr: "T" + } + input_arg { + name: "epsilon" + type_attr: "T" + } + input_arg { + name: "grad" + type_attr: "T" + } + output_arg { + name: "out" + type_attr: "T" + is_ref: true + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_INT64 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_BFLOAT16 + type: DT_UINT16 + type: DT_COMPLEX128 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } + attr { + name: "use_locking" + type: "bool" + default_value { + b: false + } + } +} op { name: "ApplyAdadelta" input_arg { @@ -11234,6 +11313,38 @@ op { } } } +op { + name: "BroadcastTo" + input_arg { + name: "input" + type_attr: "T" + } + input_arg { + name: "shape" + type_attr: "Tidx" + } + output_arg { + name: "output" + type_attr: "T" + } + attr { + name: "T" + type: "type" + } + attr { + name: "Tidx" + type: "type" + default_value { + type: DT_INT32 + } + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } +} op { name: "Bucketize" input_arg { @@ -42885,6 +42996,78 @@ op { } } } +op { + name: "ResourceApplyAdaMax" + input_arg { + name: "var" + type: DT_RESOURCE + } + input_arg { + name: "m" + type: DT_RESOURCE + } + input_arg { + name: "v" + type: DT_RESOURCE + } + input_arg { + name: "beta1_power" + type_attr: "T" + } + input_arg { + name: "lr" + type_attr: "T" + } + input_arg { + name: "beta1" + type_attr: "T" + } + input_arg { + name: "beta2" + type_attr: "T" + } + input_arg { + name: "epsilon" + type_attr: "T" + } + input_arg { + name: "grad" + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_INT64 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_BFLOAT16 + type: DT_UINT16 + type: DT_COMPLEX128 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } + attr { + name: "use_locking" + type: "bool" + default_value { + b: false + } + } + is_stateful: true +} op { name: "ResourceApplyAdadelta" input_arg { @@ -66434,6 +66617,17 @@ op { } } } +op { + name: "StringStrip" + input_arg { + name: "input" + type: DT_STRING + } + output_arg { + name: "output" + type: DT_STRING + } +} op { name: "StringToHashBucket" input_arg { diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index d1773daebe..2edd15c446 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -684,6 +684,85 @@ op { } } } +op { + name: "ApplyAdaMax" + input_arg { + name: "var" + type_attr: "T" + is_ref: true + } + input_arg { + name: "m" + type_attr: "T" + is_ref: true + } + input_arg { + name: "v" + type_attr: "T" + is_ref: true + } + input_arg { + name: "beta1_power" + type_attr: "T" + } + input_arg { + name: "lr" + type_attr: "T" + } + input_arg { + name: "beta1" + type_attr: "T" + } + input_arg { + name: "beta2" + type_attr: "T" + } + input_arg { + name: "epsilon" + type_attr: "T" + } + input_arg { + name: "grad" + type_attr: "T" + } + output_arg { + name: "out" + type_attr: "T" + is_ref: true + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_INT64 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_BFLOAT16 + type: DT_UINT16 + type: DT_COMPLEX128 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } + attr { + name: "use_locking" + type: "bool" + default_value { + b: false + } + } +} op { name: "ApplyAdadelta" input_arg { @@ -4388,6 +4467,38 @@ op { } } } +op { + name: "BroadcastTo" + input_arg { + name: "input" + type_attr: "T" + } + input_arg { + name: "shape" + type_attr: "Tidx" + } + output_arg { + name: "output" + type_attr: "T" + } + attr { + name: "T" + type: "type" + } + attr { + name: "Tidx" + type: "type" + default_value { + type: DT_INT32 + } + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } +} op { name: "Bucketize" input_arg { @@ -21487,6 +21598,78 @@ op { } } } +op { + name: "ResourceApplyAdaMax" + input_arg { + name: "var" + type: DT_RESOURCE + } + input_arg { + name: "m" + type: DT_RESOURCE + } + input_arg { + name: "v" + type: DT_RESOURCE + } + input_arg { + name: "beta1_power" + type_attr: "T" + } + input_arg { + name: "lr" + type_attr: "T" + } + input_arg { + name: "beta1" + type_attr: "T" + } + input_arg { + name: "beta2" + type_attr: "T" + } + input_arg { + name: "epsilon" + type_attr: "T" + } + input_arg { + name: "grad" + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_INT64 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_BFLOAT16 + type: DT_UINT16 + type: DT_COMPLEX128 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } + attr { + name: "use_locking" + type: "bool" + default_value { + b: false + } + } + is_stateful: true +} op { name: "ResourceApplyAdadelta" input_arg { @@ -30483,6 +30666,17 @@ op { } } } +op { + name: "StringStrip" + input_arg { + name: "input" + type: DT_STRING + } + output_arg { + name: "output" + type: DT_STRING + } +} op { name: "StringToHashBucket" input_arg { -- GitLab From aab0ef354b628ff4d88ab7f90b2d5bdcc440b6de Mon Sep 17 00:00:00 2001 From: Igor Ganichev Date: Tue, 24 Apr 2018 00:15:19 -0700 Subject: [PATCH 192/434] Internal Change PiperOrigin-RevId: 194043623 --- .../eager/python/examples/resnet50/resnet50_test.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/eager/python/examples/resnet50/resnet50_test.py b/tensorflow/contrib/eager/python/examples/resnet50/resnet50_test.py index 09a0cd88d8..8517a3bf7b 100644 --- a/tensorflow/contrib/eager/python/examples/resnet50/resnet50_test.py +++ b/tensorflow/contrib/eager/python/examples/resnet50/resnet50_test.py @@ -169,7 +169,7 @@ class ResNet50Benchmarks(tf.test.Benchmark): def _train_batch_sizes(self): """Choose batch sizes based on GPU capability.""" for device in device_lib.list_local_devices(): - if 'GPU:0' in device.name: + if tf.DeviceSpec.from_string(device.name).device_type == 'GPU': # Avoid OOM errors with larger batch sizes, which seem to cause errors # later on even if caught. # @@ -180,6 +180,11 @@ class ResNet50Benchmarks(tf.test.Benchmark): return (16,) if 'P100' in device.physical_device_desc: return (16, 32, 64) + + if tf.DeviceSpec.from_string(device.name).device_type == 'TPU': + # TODO(iga): Training fails with batch size of 16, probably because of + # no layout optimizations with op-by-op mode. Investigate more. + return (8,) return (16, 32) def _report(self, label, start, num_iters, device, batch_size, data_format): @@ -267,7 +272,7 @@ class ResNet50Benchmarks(tf.test.Benchmark): self._force_device_sync() self._report(label, start, num_iters, device, batch_size, data_format) - def benchmark_eager_train(self): + def benchmark_eager_train_sync(self): self._benchmark_eager_train('eager_train', MockIterator, defun=False) def benchmark_eager_train_async(self): -- GitLab From 8f20757e9bff4e2f2cdaf1a2e655eb7e0c17b68c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 24 Apr 2018 02:00:06 -0700 Subject: [PATCH 193/434] Moving the Var class to framework so that it can be part of framework_headers_lib and accessible from contrib. PiperOrigin-RevId: 194054227 --- tensorflow/core/framework/resource_var.h | 58 ++++++++++++++++++++++++ tensorflow/core/kernels/variable_ops.h | 34 +------------- 2 files changed, 59 insertions(+), 33 deletions(-) create mode 100644 tensorflow/core/framework/resource_var.h diff --git a/tensorflow/core/framework/resource_var.h b/tensorflow/core/framework/resource_var.h new file mode 100644 index 0000000000..872b8f8b30 --- /dev/null +++ b/tensorflow/core/framework/resource_var.h @@ -0,0 +1,58 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_FRAMEWORK_RESOURCE_VAR_H_ +#define TENSORFLOW_CORE_FRAMEWORK_RESOURCE_VAR_H_ + +#include "tensorflow/core/framework/resource_mgr.h" + +namespace tensorflow { + +// Resource stored by variables in the resource manager +// (new, resource-style version). +class Var : public ResourceBase { + public: + explicit Var(DataType dtype) : tensor_(dtype) {} + // Not copyable or movable. + Var(const Var&) = delete; + Var& operator=(const Var&) = delete; + + // TODO(ebrevdo): Use LockSet instead of exposing mu. + mutex* mu() { return &mu_; } + Tensor* tensor() { return &tensor_; } + + string DebugString() override { + return strings::StrCat(DataTypeString(tensor_.dtype()), "/", + tensor_.shape().DebugString()); + } + + // Only used in the resource variable path. In resource variables, + // tensor.IsInitialized() can be true (i.e. have memory allocated to it) while + // there is not a good value there due to a race condition, and it's possible + // to stumble upon this during variable.initialized_value(). So it's best to + // just store directly whether the variable is initialized. + bool is_initialized = false; // GUARDED_BY(mu_) but annotalysis doesn't like + // it. + + private: + mutex mu_; + Tensor tensor_; + + ~Var() override {} +}; + +} // end namespace tensorflow + +#endif // TENSORFLOW_CORE_FRAMEWORK_RESOURCE_VAR_H_ diff --git a/tensorflow/core/kernels/variable_ops.h b/tensorflow/core/kernels/variable_ops.h index 8b406e5311..f27dab4ddd 100644 --- a/tensorflow/core/kernels/variable_ops.h +++ b/tensorflow/core/kernels/variable_ops.h @@ -20,6 +20,7 @@ limitations under the License. #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/resource_mgr.h" +#include "tensorflow/core/framework/resource_var.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/macros.h" #include "tensorflow/core/platform/mutex.h" @@ -27,39 +28,6 @@ limitations under the License. namespace tensorflow { -// Resource stored by variables in the resource manager -// (new, resource-style version). -class Var : public ResourceBase { - public: - explicit Var(DataType dtype) : tensor_(dtype) {} - // Not copyable or movable. - Var(const Var&) = delete; - Var& operator=(const Var&) = delete; - - // TODO(ebrevdo): Use LockSet instead of exposing mu. - mutex* mu() { return &mu_; } - Tensor* tensor() { return &tensor_; } - - string DebugString() override { - return strings::StrCat(DataTypeString(tensor_.dtype()), "/", - tensor_.shape().DebugString()); - } - - // Only used in the resource variable path. In resource variables, - // tensor.IsInitialized() can be true (i.e. have memory allocated to it) while - // there is not a good value there due to a race condition, and it's possible - // to stumble upon this during variable.initialized_value(). So it's best to - // just store directly whether the variable is initialized. - bool is_initialized = false; // GUARDED_BY(mu_) but annotalysis doesn't like - // it. - - private: - mutex mu_; - Tensor tensor_; - - ~Var() override {} -}; - class VariableOp : public OpKernel { public: explicit VariableOp(OpKernelConstruction* context); -- GitLab From 7ea8e98a9ecf5ad8c23a8df220126f6addbdf2af Mon Sep 17 00:00:00 2001 From: Sagi Date: Tue, 24 Apr 2018 17:36:49 +0800 Subject: [PATCH 194/434] Update README.md Awesome and details doc! But I wouldn't call it an "awkward" package path :) --- tensorflow/go/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/go/README.md b/tensorflow/go/README.md index b1bd87eb0c..e251356ec8 100644 --- a/tensorflow/go/README.md +++ b/tensorflow/go/README.md @@ -5,7 +5,7 @@ Construct and execute TensorFlow graphs in Go. [![GoDoc](https://godoc.org/github.com/tensorflow/tensorflow/tensorflow/go?status.svg)](https://godoc.org/github.com/tensorflow/tensorflow/tensorflow/go) > *WARNING*: The API defined in this package is not stable and can change -> without notice. The same goes for the awkward package path +> without notice. The same goes for the package path: > (`github.com/tensorflow/tensorflow/tensorflow/go`). ## Quickstart -- GitLab From e74b98ba6348d869fee50b95b7795885fdedecee Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Tue, 24 Apr 2018 04:33:16 -0700 Subject: [PATCH 195/434] Automated g4 rollback of changelist 193718607 PiperOrigin-RevId: 194068437 --- .../core/distributed_runtime/master_session.cc | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/tensorflow/core/distributed_runtime/master_session.cc b/tensorflow/core/distributed_runtime/master_session.cc index e3022f38a2..83afc5b1a4 100644 --- a/tensorflow/core/distributed_runtime/master_session.cc +++ b/tensorflow/core/distributed_runtime/master_session.cc @@ -89,6 +89,10 @@ class MasterSession::ReffedClientGraph : public core::RefCounted { ~ReffedClientGraph() override { if (should_deregister_) { DeregisterPartitions(); + } else { + for (Part& part : partitions_) { + worker_cache_->ReleaseWorker(part.name, part.worker); + } } } @@ -1174,14 +1178,8 @@ Status MasterSession::Create(GraphDef* graph_def, TF_RETURN_IF_ERROR(GraphExecutionState::MakeForBaseGraph( graph_def, execution_options, &execution_state_)); } - // TODO(b/36574172): Remove these conditions when ClusterSpec - // propagation is supported in all servers. - if (options.cluster_def != nullptr || - session_opts_.config.isolate_session_state()) { - should_delete_worker_sessions_ = true; - return CreateWorkerSessions(options); - } - return Status::OK(); + should_delete_worker_sessions_ = true; + return CreateWorkerSessions(options); } Status MasterSession::CreateWorkerSessions( -- GitLab From 9f38ab74161a0e8dd0b35b47f23ddeda7b286af3 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 24 Apr 2018 04:35:39 -0700 Subject: [PATCH 196/434] Add variants of DoBlasGemmWithAlgorithm with alpha being on device. This is in preparation of allowing XLA to fuse (A dot b) * alpha where alpha can be on device instead of just a constant. PiperOrigin-RevId: 194068597 --- tensorflow/stream_executor/blas.h | 81 ++++++++----- tensorflow/stream_executor/cuda/cuda_blas.cc | 81 ++++++++----- tensorflow/stream_executor/cuda/cuda_blas.h | 14 +-- .../stream_executor/host_or_device_scalar.h | 56 +++++++++ tensorflow/stream_executor/stream.cc | 114 +++++++++++------- tensorflow/stream_executor/stream.h | 62 +++++----- 6 files changed, 263 insertions(+), 145 deletions(-) create mode 100644 tensorflow/stream_executor/host_or_device_scalar.h diff --git a/tensorflow/stream_executor/blas.h b/tensorflow/stream_executor/blas.h index 6e62b85728..be0b0bf5fb 100644 --- a/tensorflow/stream_executor/blas.h +++ b/tensorflow/stream_executor/blas.h @@ -41,9 +41,10 @@ limitations under the License. #define TENSORFLOW_STREAM_EXECUTOR_BLAS_H_ #include -#include "tensorflow/stream_executor/platform/port.h" +#include "tensorflow/stream_executor/host_or_device_scalar.h" #include "tensorflow/stream_executor/lib/array_slice.h" +#include "tensorflow/stream_executor/platform/port.h" namespace Eigen { struct half; @@ -1032,43 +1033,49 @@ class BlasSupport { // creating a new Stream for each attempt. virtual bool DoBlasGemmWithAlgorithm( Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m, - uint64 n, uint64 k, int alpha, const DeviceMemory &a, int lda, - const DeviceMemory &b, int ldb, int beta, DeviceMemory *c, + uint64 n, uint64 k, const HostOrDeviceScalar &alpha, + const DeviceMemory &a, int lda, const DeviceMemory &b, + int ldb, const HostOrDeviceScalar &beta, DeviceMemory *c, int ldc, ComputationType computation_type, AlgorithmType algorithm, ProfileResult *output_profile_result) = 0; virtual bool DoBlasGemmWithAlgorithm( Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m, - uint64 n, uint64 k, const Eigen::half &alpha, + uint64 n, uint64 k, const HostOrDeviceScalar &alpha, const DeviceMemory &a, int lda, - const DeviceMemory &b, int ldb, const Eigen::half &beta, - DeviceMemory *c, int ldc, ComputationType computation_type, - AlgorithmType algorithm, ProfileResult *output_profile_result) = 0; + const DeviceMemory &b, int ldb, + const HostOrDeviceScalar &beta, DeviceMemory *c, + int ldc, ComputationType computation_type, AlgorithmType algorithm, + ProfileResult *output_profile_result) = 0; virtual bool DoBlasGemmWithAlgorithm( Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m, - uint64 n, uint64 k, float alpha, const DeviceMemory &a, int lda, - const DeviceMemory &b, int ldb, float beta, DeviceMemory *c, + uint64 n, uint64 k, const HostOrDeviceScalar &alpha, + const DeviceMemory &a, int lda, const DeviceMemory &b, + int ldb, const HostOrDeviceScalar &beta, DeviceMemory *c, int ldc, ComputationType computation_type, AlgorithmType algorithm, ProfileResult *output_profile_result) = 0; virtual bool DoBlasGemmWithAlgorithm( Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m, - uint64 n, uint64 k, double alpha, const DeviceMemory &a, int lda, - const DeviceMemory &b, int ldb, double beta, - DeviceMemory *c, int ldc, ComputationType computation_type, - AlgorithmType algorithm, ProfileResult *output_profile_result) = 0; + uint64 n, uint64 k, const HostOrDeviceScalar &alpha, + const DeviceMemory &a, int lda, const DeviceMemory &b, + int ldb, const HostOrDeviceScalar &beta, DeviceMemory *c, + int ldc, ComputationType computation_type, AlgorithmType algorithm, + ProfileResult *output_profile_result) = 0; virtual bool DoBlasGemmWithAlgorithm( Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m, - uint64 n, uint64 k, std::complex alpha, + uint64 n, uint64 k, const HostOrDeviceScalar> &alpha, const DeviceMemory> &a, int lda, const DeviceMemory> &b, int ldb, - std::complex beta, DeviceMemory> *c, int ldc, + const HostOrDeviceScalar> &beta, + DeviceMemory> *c, int ldc, ComputationType computation_type, AlgorithmType algorithm, ProfileResult *output_profile_result) = 0; virtual bool DoBlasGemmWithAlgorithm( Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m, - uint64 n, uint64 k, std::complex alpha, + uint64 n, uint64 k, const HostOrDeviceScalar> &alpha, const DeviceMemory> &a, int lda, const DeviceMemory> &b, int ldb, - std::complex beta, DeviceMemory> *c, int ldc, + const HostOrDeviceScalar> &beta, + DeviceMemory> *c, int ldc, ComputationType computation_type, AlgorithmType algorithm, ProfileResult *output_profile_result) = 0; @@ -1886,49 +1893,57 @@ class BlasSupport { override; \ bool DoBlasGemmWithAlgorithm( \ Stream *stream, blas::Transpose transa, blas::Transpose transb, \ - uint64 m, uint64 n, uint64 k, int alpha, const DeviceMemory &a, \ - int lda, const DeviceMemory &b, int ldb, int beta, \ - DeviceMemory *c, int ldc, blas::ComputationType computation_type, \ + uint64 m, uint64 n, uint64 k, const HostOrDeviceScalar &alpha, \ + const DeviceMemory &a, int lda, const DeviceMemory &b, \ + int ldb, const HostOrDeviceScalar &beta, DeviceMemory *c, \ + int ldc, blas::ComputationType computation_type, \ blas::AlgorithmType algorithm, \ blas::ProfileResult *output_profile_result) override; \ bool DoBlasGemmWithAlgorithm( \ Stream *stream, blas::Transpose transa, blas::Transpose transb, \ - uint64 m, uint64 n, uint64 k, const Eigen::half &alpha, \ + uint64 m, uint64 n, uint64 k, \ + const HostOrDeviceScalar &alpha, \ const DeviceMemory &a, int lda, \ - const DeviceMemory &b, int ldb, const Eigen::half &beta, \ + const DeviceMemory &b, int ldb, \ + const HostOrDeviceScalar &beta, \ DeviceMemory *c, int ldc, \ blas::ComputationType computation_type, blas::AlgorithmType algorithm, \ blas::ProfileResult *output_profile_result) override; \ bool DoBlasGemmWithAlgorithm( \ Stream *stream, blas::Transpose transa, blas::Transpose transb, \ - uint64 m, uint64 n, uint64 k, float alpha, const DeviceMemory &a, \ - int lda, const DeviceMemory &b, int ldb, float beta, \ - DeviceMemory *c, int ldc, blas::ComputationType computation_type, \ + uint64 m, uint64 n, uint64 k, const HostOrDeviceScalar &alpha, \ + const DeviceMemory &a, int lda, const DeviceMemory &b, \ + int ldb, const HostOrDeviceScalar &beta, DeviceMemory *c, \ + int ldc, blas::ComputationType computation_type, \ blas::AlgorithmType algorithm, \ blas::ProfileResult *output_profile_result) override; \ bool DoBlasGemmWithAlgorithm( \ Stream *stream, blas::Transpose transa, blas::Transpose transb, \ - uint64 m, uint64 n, uint64 k, double alpha, \ + uint64 m, uint64 n, uint64 k, const HostOrDeviceScalar &alpha, \ const DeviceMemory &a, int lda, const DeviceMemory &b, \ - int ldb, double beta, DeviceMemory *c, int ldc, \ + int ldb, const HostOrDeviceScalar &beta, \ + DeviceMemory *c, int ldc, \ blas::ComputationType computation_type, blas::AlgorithmType algorithm, \ blas::ProfileResult *output_profile_result) override; \ bool DoBlasGemmWithAlgorithm( \ Stream *stream, blas::Transpose transa, blas::Transpose transb, \ - uint64 m, uint64 n, uint64 k, std::complex alpha, \ + uint64 m, uint64 n, uint64 k, \ + const HostOrDeviceScalar> &alpha, \ const DeviceMemory> &a, int lda, \ const DeviceMemory> &b, int ldb, \ - std::complex beta, DeviceMemory> *c, int ldc, \ + const HostOrDeviceScalar> &beta, \ + DeviceMemory> *c, int ldc, \ blas::ComputationType computation_type, blas::AlgorithmType algorithm, \ blas::ProfileResult *output_profile_result) override; \ bool DoBlasGemmWithAlgorithm( \ Stream *stream, blas::Transpose transa, blas::Transpose transb, \ - uint64 m, uint64 n, uint64 k, std::complex alpha, \ + uint64 m, uint64 n, uint64 k, \ + const HostOrDeviceScalar> &alpha, \ const DeviceMemory> &a, int lda, \ const DeviceMemory> &b, int ldb, \ - std::complex beta, DeviceMemory> *c, \ - int ldc, blas::ComputationType computation_type, \ - blas::AlgorithmType algorithm, \ + const HostOrDeviceScalar> &beta, \ + DeviceMemory> *c, int ldc, \ + blas::ComputationType computation_type, blas::AlgorithmType algorithm, \ blas::ProfileResult *output_profile_result) override; \ bool DoBlasGemmBatched( \ Stream *stream, blas::Transpose transa, blas::Transpose transb, \ diff --git a/tensorflow/stream_executor/cuda/cuda_blas.cc b/tensorflow/stream_executor/cuda/cuda_blas.cc index 007c0f1c86..3c1353aee3 100644 --- a/tensorflow/stream_executor/cuda/cuda_blas.cc +++ b/tensorflow/stream_executor/cuda/cuda_blas.cc @@ -2156,10 +2156,11 @@ static bool TensorOpsAvailable(int cc_major) { template bool CUDABlas::DoBlasGemmWithAlgorithmImpl( Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m, - uint64 n, uint64 k, const CompT &alpha, const DeviceMemory &a, int lda, - const DeviceMemory &b, int ldb, const CompT &beta, - DeviceMemory *c, int ldc, blas::ComputationType computation_type, - blas::AlgorithmType algorithm, blas::ProfileResult *output_profile_result) { + uint64 n, uint64 k, const HostOrDeviceScalar &alpha, + const DeviceMemory &a, int lda, const DeviceMemory &b, int ldb, + const HostOrDeviceScalar &beta, DeviceMemory *c, int ldc, + blas::ComputationType computation_type, blas::AlgorithmType algorithm, + blas::ProfileResult *output_profile_result) { // CUDA < version 8 and GPUs < sm_50 don't support cublasGemmEx. #if CUDA_VERSION < 8000 return false; @@ -2175,6 +2176,12 @@ bool CUDABlas::DoBlasGemmWithAlgorithmImpl( return false; } + // Either both 'alpha' and 'beta' need to be pointers to device memory, or + // they need to be both host scalars. + if (alpha.is_pointer() != beta.is_pointer()) { + return false; + } + std::unique_ptr timer; if (output_profile_result != nullptr) { timer.reset(new CUDATimer(parent_)); @@ -2187,10 +2194,15 @@ bool CUDABlas::DoBlasGemmWithAlgorithmImpl( // Since we are converting 'algorithm' to cublasGemmAlgo_t by static_cast, // we do the following compile-time check on the default value: static_assert(blas::kDefaultGemmAlgo == CUBLAS_GEMM_DFALT, ""); + // If 'alpha' and 'beta' are host scalars and CompT is Eigen::half, we + // essentially reinterpet_cast to __half, which is safe because Eigen::half + // inherits from __half. bool result = DoBlasInternalFailureOK( - wrap::cublasGemmEx, stream, /* pointer_mode_host = */ true, - CUDABlasTranspose(transa), CUDABlasTranspose(transb), m, n, k, &alpha, - CUDAMemory(a), cuda_in_type, lda, CUDAMemory(b), cuda_in_type, ldb, &beta, + wrap::cublasGemmEx, stream, /* pointer_mode_host = */ !alpha.is_pointer(), + CUDABlasTranspose(transa), CUDABlasTranspose(transb), m, n, k, + alpha.is_pointer() ? CUDAMemory(alpha.pointer()) : &alpha.value(), + CUDAMemory(a), cuda_in_type, lda, CUDAMemory(b), cuda_in_type, ldb, + beta.is_pointer() ? CUDAMemory(beta.pointer()) : &beta.value(), CUDAMemoryMutable(c), CUDADataType::type, ldc, CUDAComputationType(computation_type), static_cast(algorithm)); @@ -2239,10 +2251,11 @@ bool CUDABlas::GetBlasGemmAlgorithms( bool CUDABlas::DoBlasGemmWithAlgorithm( Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m, - uint64 n, uint64 k, int alpha, const DeviceMemory &a, int lda, - const DeviceMemory &b, int ldb, int beta, DeviceMemory *c, - int ldc, blas::ComputationType computation_type, - blas::AlgorithmType algorithm, blas::ProfileResult *output_profile_result) { + uint64 n, uint64 k, const HostOrDeviceScalar &alpha, + const DeviceMemory &a, int lda, const DeviceMemory &b, int ldb, + const HostOrDeviceScalar &beta, DeviceMemory *c, int ldc, + blas::ComputationType computation_type, blas::AlgorithmType algorithm, + blas::ProfileResult *output_profile_result) { return DoBlasGemmWithAlgorithmImpl( stream, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, computation_type, algorithm, output_profile_result); @@ -2250,17 +2263,25 @@ bool CUDABlas::DoBlasGemmWithAlgorithm( bool CUDABlas::DoBlasGemmWithAlgorithm( Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m, - uint64 n, uint64 k, const Eigen::half &alpha, + uint64 n, uint64 k, const HostOrDeviceScalar &alpha, const DeviceMemory &a, int lda, - const DeviceMemory &b, int ldb, const Eigen::half &beta, - DeviceMemory *c, int ldc, - blas::ComputationType computation_type, blas::AlgorithmType algorithm, - blas::ProfileResult *output_profile_result) { + const DeviceMemory &b, int ldb, + const HostOrDeviceScalar &beta, DeviceMemory *c, + int ldc, blas::ComputationType computation_type, + blas::AlgorithmType algorithm, blas::ProfileResult *output_profile_result) { if (computation_type == blas::ComputationType::kF32) { + if (alpha.is_pointer() || beta.is_pointer()) { + // We cannot easily convert a pointer to f16 memory to a pointer to f32 + // memory from here, so we don't support this for now. + // TODO(akuegel): Investigate whether we can do the conversion before + // calling DoBlasGemmWithAlgorithm. + return false; + } + HostOrDeviceScalar float_alpha(static_cast(alpha.value())); + HostOrDeviceScalar float_beta(static_cast(beta.value())); return DoBlasGemmWithAlgorithmImpl( - stream, transa, transb, m, n, k, static_cast(alpha), a, lda, b, - ldb, static_cast(beta), c, ldc, computation_type, algorithm, - output_profile_result); + stream, transa, transb, m, n, k, float_alpha, a, lda, b, ldb, + float_beta, c, ldc, computation_type, algorithm, output_profile_result); } CHECK_EQ(computation_type, blas::ComputationType::kF16); @@ -2271,8 +2292,9 @@ bool CUDABlas::DoBlasGemmWithAlgorithm( bool CUDABlas::DoBlasGemmWithAlgorithm( Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m, - uint64 n, uint64 k, float alpha, const DeviceMemory &a, int lda, - const DeviceMemory &b, int ldb, float beta, DeviceMemory *c, + uint64 n, uint64 k, const HostOrDeviceScalar &alpha, + const DeviceMemory &a, int lda, const DeviceMemory &b, + int ldb, const HostOrDeviceScalar &beta, DeviceMemory *c, int ldc, blas::ComputationType computation_type, blas::AlgorithmType algorithm, blas::ProfileResult *output_profile_result) { return DoBlasGemmWithAlgorithmImpl( @@ -2282,9 +2304,10 @@ bool CUDABlas::DoBlasGemmWithAlgorithm( bool CUDABlas::DoBlasGemmWithAlgorithm( Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m, - uint64 n, uint64 k, double alpha, const DeviceMemory &a, int lda, - const DeviceMemory &b, int ldb, double beta, - DeviceMemory *c, int ldc, blas::ComputationType computation_type, + uint64 n, uint64 k, const HostOrDeviceScalar &alpha, + const DeviceMemory &a, int lda, const DeviceMemory &b, + int ldb, const HostOrDeviceScalar &beta, DeviceMemory *c, + int ldc, blas::ComputationType computation_type, blas::AlgorithmType algorithm, blas::ProfileResult *output_profile_result) { return DoBlasGemmWithAlgorithmImpl( stream, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, @@ -2293,10 +2316,11 @@ bool CUDABlas::DoBlasGemmWithAlgorithm( bool CUDABlas::DoBlasGemmWithAlgorithm( Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m, - uint64 n, uint64 k, std::complex alpha, + uint64 n, uint64 k, const HostOrDeviceScalar> &alpha, const DeviceMemory> &a, int lda, const DeviceMemory> &b, int ldb, - std::complex beta, DeviceMemory> *c, int ldc, + const HostOrDeviceScalar> &beta, + DeviceMemory> *c, int ldc, blas::ComputationType computation_type, blas::AlgorithmType algorithm, blas::ProfileResult *output_profile_result) { return DoBlasGemmWithAlgorithmImpl( @@ -2306,10 +2330,11 @@ bool CUDABlas::DoBlasGemmWithAlgorithm( bool CUDABlas::DoBlasGemmWithAlgorithm( Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m, - uint64 n, uint64 k, std::complex alpha, + uint64 n, uint64 k, const HostOrDeviceScalar> &alpha, const DeviceMemory> &a, int lda, const DeviceMemory> &b, int ldb, - std::complex beta, DeviceMemory> *c, int ldc, + const HostOrDeviceScalar> &beta, + DeviceMemory> *c, int ldc, blas::ComputationType computation_type, blas::AlgorithmType algorithm, blas::ProfileResult *output_profile_result) { return DoBlasGemmWithAlgorithmImpl( diff --git a/tensorflow/stream_executor/cuda/cuda_blas.h b/tensorflow/stream_executor/cuda/cuda_blas.h index 55c414a1f9..12dc5e47fd 100644 --- a/tensorflow/stream_executor/cuda/cuda_blas.h +++ b/tensorflow/stream_executor/cuda/cuda_blas.h @@ -21,6 +21,7 @@ limitations under the License. #define TENSORFLOW_STREAM_EXECUTOR_CUDA_CUDA_BLAS_H_ #include "tensorflow/stream_executor/blas.h" +#include "tensorflow/stream_executor/host_or_device_scalar.h" #include "tensorflow/stream_executor/lib/stringpiece.h" #include "tensorflow/stream_executor/platform/mutex.h" #include "tensorflow/stream_executor/platform/port.h" @@ -116,18 +117,13 @@ class CUDABlas : public blas::BlasSupport { int batch_count, ScratchAllocator *scratch_allocator); // Helper function for implementing DoBlasGemmWithAlgorithm. - // - // We take alpha and beta by const reference because T might be Eigen::half, - // and we want to avoid pulling in a dependency on Eigen. When we pass the - // references to cublas, we essentially reinterpret_cast to __half, which is - // safe because Eigen::half inherits from __half. template bool DoBlasGemmWithAlgorithmImpl( Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m, - uint64 n, uint64 k, const CompT &alpha, const DeviceMemory &a, - int lda, const DeviceMemory &b, int ldb, const CompT &beta, - DeviceMemory *c, int ldc, blas::ComputationType computation_type, - blas::AlgorithmType algorithm, + uint64 n, uint64 k, const HostOrDeviceScalar &alpha, + const DeviceMemory &a, int lda, const DeviceMemory &b, int ldb, + const HostOrDeviceScalar &beta, DeviceMemory *c, int ldc, + blas::ComputationType computation_type, blas::AlgorithmType algorithm, blas::ProfileResult *output_profile_result); // Helper function for implementing DoBlasGemmWithProfiling. diff --git a/tensorflow/stream_executor/host_or_device_scalar.h b/tensorflow/stream_executor/host_or_device_scalar.h new file mode 100644 index 0000000000..c9e3e14778 --- /dev/null +++ b/tensorflow/stream_executor/host_or_device_scalar.h @@ -0,0 +1,56 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_STREAM_EXECUTOR_HOST_OR_DEVICE_SCALAR_H_ +#define TENSORFLOW_STREAM_EXECUTOR_HOST_OR_DEVICE_SCALAR_H_ + +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/stream_executor/device_memory.h" + +namespace stream_executor { + +// Allows to represent a value that is either a host scalar or a scalar stored +// on the GPU device. +template +class HostOrDeviceScalar { + public: + // Not marked as explicit because when using this constructor, we usually want + // to set this to a compile-time constant. + HostOrDeviceScalar(ElemT value) : value_(value), is_pointer_(false) {} + explicit HostOrDeviceScalar(const DeviceMemory& pointer) + : pointer_(pointer), is_pointer_(true) { + CHECK_EQ(1, pointer.ElementCount()); + } + + bool is_pointer() const { return is_pointer_; } + const DeviceMemory& pointer() const { + CHECK(is_pointer()); + return pointer_; + } + const ElemT& value() const { + CHECK(!is_pointer()); + return value_; + } + + private: + union { + ElemT value_; + DeviceMemory pointer_; + }; + bool is_pointer_; +}; + +} // namespace stream_executor +#endif // TENSORFLOW_STREAM_EXECUTOR_HOST_OR_DEVICE_SCALAR_H_ diff --git a/tensorflow/stream_executor/stream.cc b/tensorflow/stream_executor/stream.cc index f59d9a13ac..093f0c9306 100644 --- a/tensorflow/stream_executor/stream.cc +++ b/tensorflow/stream_executor/stream.cc @@ -20,6 +20,7 @@ limitations under the License. #include "third_party/eigen3/Eigen/Core" #include "tensorflow/stream_executor/blas.h" #include "tensorflow/stream_executor/host_buffer.h" +#include "tensorflow/stream_executor/host_or_device_scalar.h" #include "tensorflow/stream_executor/lib/stacktrace.h" #include "tensorflow/stream_executor/lib/strcat.h" #include "tensorflow/stream_executor/platform.h" @@ -133,6 +134,14 @@ string ToVlogString(float f) { return port::StrCat(f); } string ToVlogString(double d) { return port::StrCat(d); } +template +string ToVlogString(const HostOrDeviceScalar &memory_or_constant) { + if (memory_or_constant.is_pointer()) { + return ToVlogString(memory_or_constant.pointer()); + } + return ToVlogString(memory_or_constant.value()); +} + template string ToVlogString(port::ArraySlice elements) { string str = port::StrCat( @@ -3882,22 +3891,23 @@ Stream &Stream::ThenBlasGemmWithProfiling( Stream &Stream::ThenBlasGemmWithAlgorithm( blas::Transpose transa, blas::Transpose transb, uint64 m, uint64 n, - uint64 k, const Eigen::half &alpha, const DeviceMemory &a, - int lda, const DeviceMemory &b, int ldb, - const Eigen::half &beta, DeviceMemory *c, int ldc, - blas::ComputationType computation_type, blas::AlgorithmType algorithm, - blas::ProfileResult *output_profile_result) { + uint64 k, const HostOrDeviceScalar &alpha, + const DeviceMemory &a, int lda, + const DeviceMemory &b, int ldb, + const HostOrDeviceScalar &beta, DeviceMemory *c, + int ldc, blas::ComputationType computation_type, + blas::AlgorithmType algorithm, blas::ProfileResult *output_profile_result) { VLOG_CALL(PARAM(transa), PARAM(transb), PARAM(m), PARAM(n), PARAM(k), PARAM(alpha), PARAM(a), PARAM(lda), PARAM(b), PARAM(ldb), PARAM(beta), PARAM(c), PARAM(ldc), PARAM(computation_type), PARAM(algorithm)); - ThenBlasWithProfileImpl &, int, - const DeviceMemory &, int, - const Eigen::half &, DeviceMemory *, int, - blas::ComputationType, blas::AlgorithmType> + ThenBlasWithProfileImpl< + blas::Transpose, blas::Transpose, uint64, uint64, uint64, + const HostOrDeviceScalar &, + const DeviceMemory &, int, const DeviceMemory &, + int, const HostOrDeviceScalar &, DeviceMemory *, + int, blas::ComputationType, blas::AlgorithmType> impl; return impl(this, &blas::BlasSupport::DoBlasGemmWithAlgorithm, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, computation_type, @@ -3906,18 +3916,20 @@ Stream &Stream::ThenBlasGemmWithAlgorithm( Stream &Stream::ThenBlasGemmWithAlgorithm( blas::Transpose transa, blas::Transpose transb, uint64 m, uint64 n, - uint64 k, int alpha, const DeviceMemory &a, int lda, - const DeviceMemory &b, int ldb, int beta, DeviceMemory *c, - int ldc, blas::ComputationType computation_type, - blas::AlgorithmType algorithm, blas::ProfileResult *output_profile_result) { + uint64 k, const HostOrDeviceScalar &alpha, const DeviceMemory &a, + int lda, const DeviceMemory &b, int ldb, + const HostOrDeviceScalar &beta, DeviceMemory *c, int ldc, + blas::ComputationType computation_type, blas::AlgorithmType algorithm, + blas::ProfileResult *output_profile_result) { VLOG_CALL(PARAM(transa), PARAM(transb), PARAM(m), PARAM(n), PARAM(k), PARAM(alpha), PARAM(a), PARAM(lda), PARAM(b), PARAM(ldb), PARAM(beta), PARAM(c), PARAM(ldc), PARAM(computation_type), PARAM(algorithm)); ThenBlasWithProfileImpl< - blas::Transpose, blas::Transpose, uint64, uint64, uint64, int, - const DeviceMemory &, int, const DeviceMemory &, int, int, + blas::Transpose, blas::Transpose, uint64, uint64, uint64, + const HostOrDeviceScalar &, const DeviceMemory &, int, + const DeviceMemory &, int, const HostOrDeviceScalar &, DeviceMemory *, int, blas::ComputationType, blas::AlgorithmType> impl; return impl(this, &blas::BlasSupport::DoBlasGemmWithAlgorithm, transa, transb, @@ -3927,8 +3939,9 @@ Stream &Stream::ThenBlasGemmWithAlgorithm( Stream &Stream::ThenBlasGemmWithAlgorithm( blas::Transpose transa, blas::Transpose transb, uint64 m, uint64 n, - uint64 k, float alpha, const DeviceMemory &a, int lda, - const DeviceMemory &b, int ldb, float beta, DeviceMemory *c, + uint64 k, const HostOrDeviceScalar &alpha, + const DeviceMemory &a, int lda, const DeviceMemory &b, + int ldb, const HostOrDeviceScalar &beta, DeviceMemory *c, int ldc, blas::ComputationType computation_type, blas::AlgorithmType algorithm, blas::ProfileResult *output_profile_result) { VLOG_CALL(PARAM(transa), PARAM(transb), PARAM(m), PARAM(n), PARAM(k), @@ -3937,8 +3950,9 @@ Stream &Stream::ThenBlasGemmWithAlgorithm( PARAM(algorithm)); ThenBlasWithProfileImpl< - blas::Transpose, blas::Transpose, uint64, uint64, uint64, float, - const DeviceMemory &, int, const DeviceMemory &, int, float, + blas::Transpose, blas::Transpose, uint64, uint64, uint64, + const HostOrDeviceScalar &, const DeviceMemory &, int, + const DeviceMemory &, int, const HostOrDeviceScalar &, DeviceMemory *, int, blas::ComputationType, blas::AlgorithmType> impl; return impl(this, &blas::BlasSupport::DoBlasGemmWithAlgorithm, transa, transb, @@ -3948,32 +3962,35 @@ Stream &Stream::ThenBlasGemmWithAlgorithm( Stream &Stream::ThenBlasGemmWithAlgorithm( blas::Transpose transa, blas::Transpose transb, uint64 m, uint64 n, - uint64 k, double alpha, const DeviceMemory &a, int lda, - const DeviceMemory &b, int ldb, double beta, - DeviceMemory *c, int ldc, blas::ComputationType computation_type, + uint64 k, const HostOrDeviceScalar &alpha, + const DeviceMemory &a, int lda, const DeviceMemory &b, + int ldb, const HostOrDeviceScalar &beta, DeviceMemory *c, + int ldc, blas::ComputationType computation_type, blas::AlgorithmType algorithm, blas::ProfileResult *output_profile_result) { VLOG_CALL(PARAM(transa), PARAM(transb), PARAM(m), PARAM(n), PARAM(k), PARAM(alpha), PARAM(a), PARAM(lda), PARAM(b), PARAM(ldb), PARAM(beta), PARAM(c), PARAM(ldc), PARAM(computation_type), PARAM(algorithm)); - ThenBlasWithProfileImpl &, int, - const DeviceMemory &, int, double, - DeviceMemory *, int, blas::ComputationType, - blas::AlgorithmType> + ThenBlasWithProfileImpl< + blas::Transpose, blas::Transpose, uint64, uint64, uint64, + const HostOrDeviceScalar &, const DeviceMemory &, int, + const DeviceMemory &, int, const HostOrDeviceScalar &, + DeviceMemory *, int, blas::ComputationType, blas::AlgorithmType> impl; return impl(this, &blas::BlasSupport::DoBlasGemmWithAlgorithm, transa, transb, - m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, computation_type, + m, n, k, HostOrDeviceScalar(alpha), a, lda, b, ldb, + HostOrDeviceScalar(beta), c, ldc, computation_type, algorithm, output_profile_result); } Stream &Stream::ThenBlasGemmWithAlgorithm( blas::Transpose transa, blas::Transpose transb, uint64 m, uint64 n, - uint64 k, std::complex alpha, + uint64 k, const HostOrDeviceScalar> &alpha, const DeviceMemory> &a, int lda, const DeviceMemory> &b, int ldb, - std::complex beta, DeviceMemory> *c, int ldc, + const HostOrDeviceScalar> &beta, + DeviceMemory> *c, int ldc, blas::ComputationType computation_type, blas::AlgorithmType algorithm, blas::ProfileResult *output_profile_result) { VLOG_CALL(PARAM(transa), PARAM(transb), PARAM(m), PARAM(n), PARAM(k), @@ -3981,12 +3998,14 @@ Stream &Stream::ThenBlasGemmWithAlgorithm( PARAM(beta), PARAM(c), PARAM(ldc), PARAM(computation_type), PARAM(algorithm)); - ThenBlasWithProfileImpl< - blas::Transpose, blas::Transpose, uint64, uint64, uint64, - std::complex, const DeviceMemory> &, int, - const DeviceMemory> &, int, std::complex, - DeviceMemory> *, int, blas::ComputationType, - blas::AlgorithmType> + ThenBlasWithProfileImpl> &, + const DeviceMemory> &, int, + const DeviceMemory> &, int, + const HostOrDeviceScalar> &, + DeviceMemory> *, int, + blas::ComputationType, blas::AlgorithmType> impl; return impl(this, &blas::BlasSupport::DoBlasGemmWithAlgorithm, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, computation_type, @@ -3995,10 +4014,11 @@ Stream &Stream::ThenBlasGemmWithAlgorithm( Stream &Stream::ThenBlasGemmWithAlgorithm( blas::Transpose transa, blas::Transpose transb, uint64 m, uint64 n, - uint64 k, std::complex alpha, + uint64 k, const HostOrDeviceScalar> &alpha, const DeviceMemory> &a, int lda, const DeviceMemory> &b, int ldb, - std::complex beta, DeviceMemory> *c, int ldc, + const HostOrDeviceScalar> &beta, + DeviceMemory> *c, int ldc, blas::ComputationType computation_type, blas::AlgorithmType algorithm, blas::ProfileResult *output_profile_result) { VLOG_CALL(PARAM(transa), PARAM(transb), PARAM(m), PARAM(n), PARAM(k), @@ -4006,12 +4026,14 @@ Stream &Stream::ThenBlasGemmWithAlgorithm( PARAM(beta), PARAM(c), PARAM(ldc), PARAM(computation_type), PARAM(algorithm)); - ThenBlasWithProfileImpl< - blas::Transpose, blas::Transpose, uint64, uint64, uint64, - std::complex, const DeviceMemory> &, int, - const DeviceMemory> &, int, std::complex, - DeviceMemory> *, int, blas::ComputationType, - blas::AlgorithmType> + ThenBlasWithProfileImpl> &, + const DeviceMemory> &, int, + const DeviceMemory> &, int, + const HostOrDeviceScalar> &, + DeviceMemory> *, int, + blas::ComputationType, blas::AlgorithmType> impl; return impl(this, &blas::BlasSupport::DoBlasGemmWithAlgorithm, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, computation_type, diff --git a/tensorflow/stream_executor/stream.h b/tensorflow/stream_executor/stream.h index d4a81440e9..3d1b011c57 100644 --- a/tensorflow/stream_executor/stream.h +++ b/tensorflow/stream_executor/stream.h @@ -30,6 +30,7 @@ limitations under the License. #include "tensorflow/stream_executor/dnn.h" #include "tensorflow/stream_executor/event.h" #include "tensorflow/stream_executor/fft.h" +#include "tensorflow/stream_executor/host_or_device_scalar.h" #include "tensorflow/stream_executor/kernel.h" #include "tensorflow/stream_executor/launch_dim.h" #include "tensorflow/stream_executor/lib/array_slice.h" @@ -1422,50 +1423,53 @@ class Stream { // See BlasSupport::DoBlasGemmWithAlgorithm. Stream &ThenBlasGemmWithAlgorithm( blas::Transpose transa, blas::Transpose transb, uint64 m, uint64 n, - uint64 k, const Eigen::half &alpha, const DeviceMemory &a, - int lda, const DeviceMemory &b, int ldb, - const Eigen::half &beta, DeviceMemory *c, int ldc, - blas::ComputationType computation_type, blas::AlgorithmType algorithm, + uint64 k, const HostOrDeviceScalar &alpha, + const DeviceMemory &a, int lda, + const DeviceMemory &b, int ldb, + const HostOrDeviceScalar &beta, DeviceMemory *c, + int ldc, blas::ComputationType computation_type, + blas::AlgorithmType algorithm, blas::ProfileResult *output_profile_result); - Stream &ThenBlasGemmWithAlgorithm(blas::Transpose transa, - blas::Transpose transb, uint64 m, uint64 n, - uint64 k, int alpha, - const DeviceMemory &a, int lda, - const DeviceMemory &b, int ldb, - int beta, DeviceMemory *c, int ldc, - blas::ComputationType computation_type, - blas::AlgorithmType algorithm, - blas::ProfileResult *output_profile_result); - Stream &ThenBlasGemmWithAlgorithm(blas::Transpose transa, - blas::Transpose transb, uint64 m, uint64 n, - uint64 k, float alpha, - const DeviceMemory &a, int lda, - const DeviceMemory &b, int ldb, - float beta, DeviceMemory *c, int ldc, - blas::ComputationType computation_type, - blas::AlgorithmType algorithm, - blas::ProfileResult *output_profile_result); Stream &ThenBlasGemmWithAlgorithm( blas::Transpose transa, blas::Transpose transb, uint64 m, uint64 n, - uint64 k, double alpha, const DeviceMemory &a, int lda, - const DeviceMemory &b, int ldb, double beta, - DeviceMemory *c, int ldc, blas::ComputationType computation_type, + uint64 k, const HostOrDeviceScalar &alpha, + const DeviceMemory &a, int lda, const DeviceMemory &b, + int ldb, const HostOrDeviceScalar &beta, DeviceMemory *c, + int ldc, blas::ComputationType computation_type, blas::AlgorithmType algorithm, blas::ProfileResult *output_profile_result); Stream &ThenBlasGemmWithAlgorithm( blas::Transpose transa, blas::Transpose transb, uint64 m, uint64 n, - uint64 k, std::complex alpha, + uint64 k, const HostOrDeviceScalar &alpha, + const DeviceMemory &a, int lda, const DeviceMemory &b, + int ldb, const HostOrDeviceScalar &beta, DeviceMemory *c, + int ldc, blas::ComputationType computation_type, + blas::AlgorithmType algorithm, + blas::ProfileResult *output_profile_result); + Stream &ThenBlasGemmWithAlgorithm( + blas::Transpose transa, blas::Transpose transb, uint64 m, uint64 n, + uint64 k, const HostOrDeviceScalar &alpha, + const DeviceMemory &a, int lda, const DeviceMemory &b, + int ldb, const HostOrDeviceScalar &beta, DeviceMemory *c, + int ldc, blas::ComputationType computation_type, + blas::AlgorithmType algorithm, + blas::ProfileResult *output_profile_result); + Stream &ThenBlasGemmWithAlgorithm( + blas::Transpose transa, blas::Transpose transb, uint64 m, uint64 n, + uint64 k, const HostOrDeviceScalar> &alpha, const DeviceMemory> &a, int lda, const DeviceMemory> &b, int ldb, - std::complex beta, DeviceMemory> *c, int ldc, + const HostOrDeviceScalar> &beta, + DeviceMemory> *c, int ldc, blas::ComputationType computation_type, blas::AlgorithmType algorithm, blas::ProfileResult *output_profile_result); Stream &ThenBlasGemmWithAlgorithm( blas::Transpose transa, blas::Transpose transb, uint64 m, uint64 n, - uint64 k, std::complex alpha, + uint64 k, const HostOrDeviceScalar> &alpha, const DeviceMemory> &a, int lda, const DeviceMemory> &b, int ldb, - std::complex beta, DeviceMemory> *c, int ldc, + const HostOrDeviceScalar> &beta, + DeviceMemory> *c, int ldc, blas::ComputationType computation_type, blas::AlgorithmType algorithm, blas::ProfileResult *output_profile_result); -- GitLab From f62c472c470aee64147df58de584f0b8450b29ad Mon Sep 17 00:00:00 2001 From: Ian Langmore Date: Tue, 24 Apr 2018 06:08:14 -0700 Subject: [PATCH 197/434] Move LinearOperatorCirculant to third_party. PiperOrigin-RevId: 194075622 --- tensorflow/contrib/linalg/__init__.py | 4 + tensorflow/python/kernel_tests/linalg/BUILD | 20 + .../linalg/linear_operator_circulant_test.py | 700 +++++++++++ tensorflow/python/ops/linalg/linalg.py | 1 + .../ops/linalg/linear_operator_circulant.py | 1074 +++++++++++++++++ ...ear-operator-circulant.__metaclass__.pbtxt | 14 + ...ow.linalg.-linear-operator-circulant.pbtxt | 155 +++ ...-operator-circulant2-d.__metaclass__.pbtxt | 14 + ...linalg.-linear-operator-circulant2-d.pbtxt | 155 +++ ...-operator-circulant3-d.__metaclass__.pbtxt | 14 + ...linalg.-linear-operator-circulant3-d.pbtxt | 155 +++ .../tools/api/golden/tensorflow.linalg.pbtxt | 12 + 12 files changed, 2318 insertions(+) create mode 100644 tensorflow/python/kernel_tests/linalg/linear_operator_circulant_test.py create mode 100644 tensorflow/python/ops/linalg/linear_operator_circulant.py create mode 100644 tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-circulant.__metaclass__.pbtxt create mode 100644 tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-circulant.pbtxt create mode 100644 tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-circulant2-d.__metaclass__.pbtxt create mode 100644 tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-circulant2-d.pbtxt create mode 100644 tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-circulant3-d.__metaclass__.pbtxt create mode 100644 tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-circulant3-d.pbtxt diff --git a/tensorflow/contrib/linalg/__init__.py b/tensorflow/contrib/linalg/__init__.py index 38bd66b13f..554854da84 100644 --- a/tensorflow/contrib/linalg/__init__.py +++ b/tensorflow/contrib/linalg/__init__.py @@ -18,6 +18,9 @@ See the @{$python/contrib.linalg} guide. @@LinearOperator @@LinearOperatorBlockDiag +@@LinearOperatorCirculant +@@LinearOperatorCirculant2D +@@LinearOperatorCirculant3D @@LinearOperatorDiag @@LinearOperatorIdentity @@LinearOperatorScaledIdentity @@ -39,6 +42,7 @@ from tensorflow.contrib.linalg.python.ops.linear_operator_addition import * from tensorflow.contrib.linalg.python.ops.linear_operator_block_diag import * from tensorflow.contrib.linalg.python.ops.linear_operator_kronecker import * from tensorflow.python.ops.linalg.linear_operator import * +from tensorflow.python.ops.linalg.linear_operator_circulant import * from tensorflow.python.ops.linalg.linear_operator_composition import * from tensorflow.python.ops.linalg.linear_operator_diag import * from tensorflow.python.ops.linalg.linear_operator_full_matrix import * diff --git a/tensorflow/python/kernel_tests/linalg/BUILD b/tensorflow/python/kernel_tests/linalg/BUILD index 7ffa48b653..faeccc8fba 100644 --- a/tensorflow/python/kernel_tests/linalg/BUILD +++ b/tensorflow/python/kernel_tests/linalg/BUILD @@ -43,6 +43,26 @@ cuda_py_test( tags = ["noasan"], # times out b/63678675 ) +cuda_py_test( + name = "linear_operator_circulant_test", + size = "medium", + srcs = ["linear_operator_circulant_test.py"], + additional_deps = [ + "//tensorflow/python/ops/linalg", + "//third_party/py/numpy", + "//tensorflow/python:array_ops", + "//tensorflow/python:spectral_ops_test_util", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:math_ops", + "//tensorflow/python:platform_test", + ], + shard_count = 5, + tags = ["noasan"], # times out b/63678675 +) + cuda_py_test( name = "linear_operator_diag_test", size = "medium", diff --git a/tensorflow/python/kernel_tests/linalg/linear_operator_circulant_test.py b/tensorflow/python/kernel_tests/linalg/linear_operator_circulant_test.py new file mode 100644 index 0000000000..e7f2f1c12b --- /dev/null +++ b/tensorflow/python/kernel_tests/linalg/linear_operator_circulant_test.py @@ -0,0 +1,700 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import contextlib + +import numpy as np + +from tensorflow.python.framework import dtypes +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import spectral_ops_test_util +from tensorflow.python.ops.linalg import linalg +from tensorflow.python.ops.linalg import linear_operator_circulant +from tensorflow.python.ops.linalg import linear_operator_test_util +from tensorflow.python.platform import test + +rng = np.random.RandomState(0) +_to_complex = linear_operator_circulant._to_complex + + +class LinearOperatorCirculantBaseTest(object): + """Common class for circulant tests.""" + + @contextlib.contextmanager + def test_session(self, *args, **kwargs): + with test.TestCase.test_session(self, *args, **kwargs) as sess: + with spectral_ops_test_util.fft_kernel_label_map(): + yield sess + + def _shape_to_spectrum_shape(self, shape): + # If spectrum.shape = batch_shape + [N], + # this creates an operator of shape batch_shape + [N, N] + return shape[:-1] + + def _spectrum_to_circulant_1d(self, spectrum, shape, dtype): + """Creates a circulant matrix from a spectrum. + + Intentionally done in an explicit yet inefficient way. This provides a + cross check to the main code that uses fancy reshapes. + + Args: + spectrum: Float or complex `Tensor`. + shape: Python list. Desired shape of returned matrix. + dtype: Type to cast the returned matrix to. + + Returns: + Circulant (batch) matrix of desired `dtype`. + """ + spectrum = _to_complex(spectrum) + spectrum_shape = self._shape_to_spectrum_shape(shape) + domain_dimension = spectrum_shape[-1] + if not domain_dimension: + return array_ops.zeros(shape, dtype) + + # Explicitly compute the action of spectrum on basis vectors. + matrix_rows = [] + for m in range(domain_dimension): + x = np.zeros([domain_dimension]) + # x is a basis vector. + x[m] = 1.0 + fft_x = math_ops.fft(x) + h_convolve_x = math_ops.ifft(spectrum * fft_x) + matrix_rows.append(h_convolve_x) + matrix = array_ops.stack(matrix_rows, axis=-1) + return math_ops.cast(matrix, dtype) + + +class LinearOperatorCirculantTestSelfAdjointOperator( + LinearOperatorCirculantBaseTest, + linear_operator_test_util.SquareLinearOperatorDerivedClassTest): + """Test of LinearOperatorCirculant when operator is self-adjoint. + + Real spectrum <==> Self adjoint operator. + Note that when the spectrum is real, the operator may still be complex. + """ + + @property + def _dtypes_to_test(self): + # This operator will always be complex because, although the specturm is + # real, the matrix will not be real. + return [dtypes.complex64] + + def _operator_and_mat_and_feed_dict(self, build_info, dtype, use_placeholder): + shape = build_info.shape + # For this test class, we are creating real spectrums. + # We also want the spectrum to have eigenvalues bounded away from zero. + # + # spectrum is bounded away from zero. + spectrum = linear_operator_test_util.random_sign_uniform( + shape=self._shape_to_spectrum_shape(shape), minval=1., maxval=2.) + # If dtype is complex, cast spectrum to complex. The imaginary part will be + # zero, so the operator will still be self-adjoint. + spectrum = math_ops.cast(spectrum, dtype) + + if use_placeholder: + spectrum_ph = array_ops.placeholder(dtypes.complex64) + # Evaluate here because (i) you cannot feed a tensor, and (ii) + # it is random and we want the same value used for both mat and feed_dict. + spectrum = spectrum.eval() + operator = linalg.LinearOperatorCirculant( + spectrum_ph, is_self_adjoint=True, input_output_dtype=dtype) + feed_dict = {spectrum_ph: spectrum} + else: + operator = linalg.LinearOperatorCirculant( + spectrum, is_self_adjoint=True, input_output_dtype=dtype) + feed_dict = None + + mat = self._spectrum_to_circulant_1d(spectrum, shape, dtype=dtype) + + return operator, mat, feed_dict + + def test_simple_hermitian_spectrum_gives_operator_with_zero_imag_part(self): + with self.test_session(): + spectrum = math_ops.cast([1., 1j, -1j], dtypes.complex64) + operator = linalg.LinearOperatorCirculant( + spectrum, input_output_dtype=dtypes.complex64) + matrix = operator.to_dense() + imag_matrix = math_ops.imag(matrix) + eps = np.finfo(np.float32).eps + np.testing.assert_allclose(0, imag_matrix.eval(), rtol=0, atol=eps * 3) + + +class LinearOperatorCirculantTestHermitianSpectrum( + LinearOperatorCirculantBaseTest, + linear_operator_test_util.SquareLinearOperatorDerivedClassTest): + """Test of LinearOperatorCirculant when the spectrum is Hermitian. + + Hermitian spectrum <==> Real valued operator. We test both real and complex + dtypes here though. So in some cases the matrix will be complex but with + zero imaginary part. + """ + + @property + def _dtypes_to_test(self): + return [dtypes.float32, dtypes.complex64] + + def _operator_and_mat_and_feed_dict(self, build_info, dtype, use_placeholder): + shape = build_info.shape + # For this test class, we are creating Hermitian spectrums. + # We also want the spectrum to have eigenvalues bounded away from zero. + # + # pre_spectrum is bounded away from zero. + pre_spectrum = linear_operator_test_util.random_uniform( + shape=self._shape_to_spectrum_shape(shape), minval=1., maxval=2.) + pre_spectrum_c = _to_complex(pre_spectrum) + + # Real{IFFT[pre_spectrum]} + # = IFFT[EvenPartOf[pre_spectrum]] + # is the IFFT of something that is also bounded away from zero. + # Therefore, FFT[pre_h] would be a well-conditioned spectrum. + pre_h = math_ops.ifft(pre_spectrum_c) + + # A spectrum is Hermitian iff it is the DFT of a real convolution kernel. + # So we will make spectrum = FFT[h], for real valued h. + h = math_ops.real(pre_h) + h_c = _to_complex(h) + + spectrum = math_ops.fft(h_c) + + if use_placeholder: + spectrum_ph = array_ops.placeholder(dtypes.complex64) + # Evaluate here because (i) you cannot feed a tensor, and (ii) + # it is random and we want the same value used for both mat and feed_dict. + spectrum = spectrum.eval() + operator = linalg.LinearOperatorCirculant( + spectrum_ph, input_output_dtype=dtype) + feed_dict = {spectrum_ph: spectrum} + else: + operator = linalg.LinearOperatorCirculant( + spectrum, input_output_dtype=dtype) + feed_dict = None + + mat = self._spectrum_to_circulant_1d(spectrum, shape, dtype=dtype) + + return operator, mat, feed_dict + + def test_simple_hermitian_spectrum_gives_operator_with_zero_imag_part(self): + with self.test_session(): + spectrum = math_ops.cast([1., 1j, -1j], dtypes.complex64) + operator = linalg.LinearOperatorCirculant( + spectrum, input_output_dtype=dtypes.complex64) + matrix = operator.to_dense() + imag_matrix = math_ops.imag(matrix) + eps = np.finfo(np.float32).eps + np.testing.assert_allclose(0, imag_matrix.eval(), rtol=0, atol=eps * 3) + + +class LinearOperatorCirculantTestNonHermitianSpectrum( + LinearOperatorCirculantBaseTest, + linear_operator_test_util.SquareLinearOperatorDerivedClassTest): + """Test of LinearOperatorCirculant when the spectrum is not Hermitian. + + Non-Hermitian spectrum <==> Complex valued operator. + We test only complex dtypes here. + """ + + @property + def _dtypes_to_test(self): + return [dtypes.complex64] + + def _operator_and_mat_and_feed_dict(self, build_info, dtype, use_placeholder): + shape = build_info.shape + # Will be well conditioned enough to get accurate solves. + spectrum = linear_operator_test_util.random_sign_uniform( + shape=self._shape_to_spectrum_shape(shape), + dtype=dtypes.complex64, + minval=1., + maxval=2.) + + if use_placeholder: + spectrum_ph = array_ops.placeholder(dtypes.complex64) + # Evaluate here because (i) you cannot feed a tensor, and (ii) + # it is random and we want the same value used for both mat and feed_dict. + spectrum = spectrum.eval() + operator = linalg.LinearOperatorCirculant( + spectrum_ph, input_output_dtype=dtype) + feed_dict = {spectrum_ph: spectrum} + else: + operator = linalg.LinearOperatorCirculant( + spectrum, input_output_dtype=dtype) + feed_dict = None + + mat = self._spectrum_to_circulant_1d(spectrum, shape, dtype=dtype) + + return operator, mat, feed_dict + + def test_simple_hermitian_spectrum_gives_operator_with_zero_imag_part(self): + with self.test_session(): + spectrum = math_ops.cast([1., 1j, -1j], dtypes.complex64) + operator = linalg.LinearOperatorCirculant( + spectrum, input_output_dtype=dtypes.complex64) + matrix = operator.to_dense() + imag_matrix = math_ops.imag(matrix) + eps = np.finfo(np.float32).eps + np.testing.assert_allclose(0, imag_matrix.eval(), rtol=0, atol=eps * 3) + + def test_simple_positive_real_spectrum_gives_self_adjoint_pos_def_oper(self): + with self.test_session() as sess: + spectrum = math_ops.cast([6., 4, 2], dtypes.complex64) + operator = linalg.LinearOperatorCirculant( + spectrum, input_output_dtype=dtypes.complex64) + matrix, matrix_h = sess.run( + [operator.to_dense(), + linalg.adjoint(operator.to_dense())]) + self.assertAllClose(matrix, matrix_h) + operator.assert_positive_definite().run() # Should not fail + operator.assert_self_adjoint().run() # Should not fail + + def test_defining_operator_using_real_convolution_kernel(self): + with self.test_session(): + convolution_kernel = [1., 2., 1.] + spectrum = math_ops.fft( + math_ops.cast(convolution_kernel, dtypes.complex64)) + + # spectrum is shape [3] ==> operator is shape [3, 3] + # spectrum is Hermitian ==> operator is real. + operator = linalg.LinearOperatorCirculant(spectrum) + + # Allow for complex output so we can make sure it has zero imag part. + self.assertEqual(operator.dtype, dtypes.complex64) + + matrix = operator.to_dense().eval() + np.testing.assert_allclose(0, np.imag(matrix), atol=1e-6) + + def test_hermitian_spectrum_gives_operator_with_zero_imag_part(self): + with self.test_session(): + # Make spectrum the FFT of a real convolution kernel h. This ensures that + # spectrum is Hermitian. + h = linear_operator_test_util.random_normal(shape=(3, 4)) + spectrum = math_ops.fft(math_ops.cast(h, dtypes.complex64)) + operator = linalg.LinearOperatorCirculant( + spectrum, input_output_dtype=dtypes.complex64) + matrix = operator.to_dense() + imag_matrix = math_ops.imag(matrix) + eps = np.finfo(np.float32).eps + np.testing.assert_allclose( + 0, imag_matrix.eval(), rtol=0, atol=eps * 3 * 4) + + def test_convolution_kernel_same_as_first_row_of_to_dense(self): + spectrum = [[3., 2., 1.], [2., 1.5, 1.]] + with self.test_session(): + operator = linalg.LinearOperatorCirculant(spectrum) + h = operator.convolution_kernel() + c = operator.to_dense() + + self.assertAllEqual((2, 3), h.get_shape()) + self.assertAllEqual((2, 3, 3), c.get_shape()) + self.assertAllClose(h.eval(), c.eval()[:, :, 0]) + + def test_assert_non_singular_fails_for_singular_operator(self): + spectrum = math_ops.cast([0, 4, 2j + 2], dtypes.complex64) + operator = linalg.LinearOperatorCirculant(spectrum) + with self.test_session(): + with self.assertRaisesOpError("Singular operator"): + operator.assert_non_singular().run() + + def test_assert_non_singular_does_not_fail_for_non_singular_operator(self): + spectrum = math_ops.cast([-3j, 4, 2j + 2], dtypes.complex64) + operator = linalg.LinearOperatorCirculant(spectrum) + with self.test_session(): + operator.assert_non_singular().run() # Should not fail + + def test_assert_positive_definite_fails_for_non_positive_definite(self): + spectrum = math_ops.cast([6., 4, 2j], dtypes.complex64) + operator = linalg.LinearOperatorCirculant(spectrum) + with self.test_session(): + with self.assertRaisesOpError("Not positive definite"): + operator.assert_positive_definite().run() + + def test_assert_positive_definite_does_not_fail_when_pos_def(self): + spectrum = math_ops.cast([6., 4, 2j + 2], dtypes.complex64) + operator = linalg.LinearOperatorCirculant(spectrum) + with self.test_session(): + operator.assert_positive_definite().run() # Should not fail + + def test_real_spectrum_and_not_self_adjoint_hint_raises(self): + spectrum = [1., 2.] + with self.assertRaisesRegexp(ValueError, "real.*always.*self-adjoint"): + linalg.LinearOperatorCirculant(spectrum, is_self_adjoint=False) + + def test_real_spectrum_auto_sets_is_self_adjoint_to_true(self): + spectrum = [1., 2.] + operator = linalg.LinearOperatorCirculant(spectrum) + self.assertTrue(operator.is_self_adjoint) + + +class LinearOperatorCirculant2DBaseTest(object): + """Common class for 2D circulant tests.""" + + @contextlib.contextmanager + def test_session(self, *args, **kwargs): + with test.TestCase.test_session(self, *args, **kwargs) as sess: + with spectral_ops_test_util.fft_kernel_label_map(): + yield sess + + @property + def _operator_build_infos(self): + build_info = linear_operator_test_util.OperatorBuildInfo + # non-batch operators (n, n) and batch operators. + return [ + build_info((0, 0)), + build_info((1, 1)), + build_info((1, 6, 6)), + build_info((3, 4, 4)), + build_info((2, 1, 3, 3)) + ] + + def _shape_to_spectrum_shape(self, shape): + """Get a spectrum shape that will make an operator of desired shape.""" + # This 2D block circulant operator takes a spectrum of shape + # batch_shape + [N0, N1], + # and creates and operator of shape + # batch_shape + [N0*N1, N0*N1] + if shape == (0, 0): + return (0, 0) + elif shape == (1, 1): + return (1, 1) + elif shape == (1, 6, 6): + return (1, 2, 3) + elif shape == (3, 4, 4): + return (3, 2, 2) + elif shape == (2, 1, 3, 3): + return (2, 1, 3, 1) + else: + raise ValueError("Unhandled shape: %s" % shape) + + def _spectrum_to_circulant_2d(self, spectrum, shape, dtype): + """Creates a block circulant matrix from a spectrum. + + Intentionally done in an explicit yet inefficient way. This provides a + cross check to the main code that uses fancy reshapes. + + Args: + spectrum: Float or complex `Tensor`. + shape: Python list. Desired shape of returned matrix. + dtype: Type to cast the returned matrix to. + + Returns: + Block circulant (batch) matrix of desired `dtype`. + """ + spectrum = _to_complex(spectrum) + spectrum_shape = self._shape_to_spectrum_shape(shape) + domain_dimension = spectrum_shape[-1] + if not domain_dimension: + return array_ops.zeros(shape, dtype) + + block_shape = spectrum_shape[-2:] + + # Explicitly compute the action of spectrum on basis vectors. + matrix_rows = [] + for n0 in range(block_shape[0]): + for n1 in range(block_shape[1]): + x = np.zeros(block_shape) + # x is a basis vector. + x[n0, n1] = 1.0 + fft_x = math_ops.fft2d(x) + h_convolve_x = math_ops.ifft2d(spectrum * fft_x) + # We want the flat version of the action of the operator on a basis + # vector, not the block version. + h_convolve_x = array_ops.reshape(h_convolve_x, shape[:-1]) + matrix_rows.append(h_convolve_x) + matrix = array_ops.stack(matrix_rows, axis=-1) + return math_ops.cast(matrix, dtype) + + +class LinearOperatorCirculant2DTestHermitianSpectrum( + LinearOperatorCirculant2DBaseTest, + linear_operator_test_util.SquareLinearOperatorDerivedClassTest): + """Test of LinearOperatorCirculant2D when the spectrum is Hermitian. + + Hermitian spectrum <==> Real valued operator. We test both real and complex + dtypes here though. So in some cases the matrix will be complex but with + zero imaginary part. + """ + + @property + def _dtypes_to_test(self): + return [dtypes.float32, dtypes.complex64] + + def _operator_and_mat_and_feed_dict(self, build_info, dtype, use_placeholder): + shape = build_info.shape + # For this test class, we are creating Hermitian spectrums. + # We also want the spectrum to have eigenvalues bounded away from zero. + # + # pre_spectrum is bounded away from zero. + pre_spectrum = linear_operator_test_util.random_uniform( + shape=self._shape_to_spectrum_shape(shape), minval=1., maxval=2.) + pre_spectrum_c = _to_complex(pre_spectrum) + + # Real{IFFT[pre_spectrum]} + # = IFFT[EvenPartOf[pre_spectrum]] + # is the IFFT of something that is also bounded away from zero. + # Therefore, FFT[pre_h] would be a well-conditioned spectrum. + pre_h = math_ops.ifft2d(pre_spectrum_c) + + # A spectrum is Hermitian iff it is the DFT of a real convolution kernel. + # So we will make spectrum = FFT[h], for real valued h. + h = math_ops.real(pre_h) + h_c = _to_complex(h) + + spectrum = math_ops.fft2d(h_c) + + if use_placeholder: + spectrum_ph = array_ops.placeholder(dtypes.complex64) + # Evaluate here because (i) you cannot feed a tensor, and (ii) + # it is random and we want the same value used for both mat and feed_dict. + spectrum = spectrum.eval() + operator = linalg.LinearOperatorCirculant2D( + spectrum_ph, input_output_dtype=dtype) + feed_dict = {spectrum_ph: spectrum} + else: + operator = linalg.LinearOperatorCirculant2D( + spectrum, input_output_dtype=dtype) + feed_dict = None + + mat = self._spectrum_to_circulant_2d(spectrum, shape, dtype=dtype) + + return operator, mat, feed_dict + + +class LinearOperatorCirculant2DTestNonHermitianSpectrum( + LinearOperatorCirculant2DBaseTest, + linear_operator_test_util.SquareLinearOperatorDerivedClassTest): + """Test of LinearOperatorCirculant when the spectrum is not Hermitian. + + Non-Hermitian spectrum <==> Complex valued operator. + We test only complex dtypes here. + """ + + @property + def _dtypes_to_test(self): + return [dtypes.complex64] + + def _operator_and_mat_and_feed_dict(self, build_info, dtype, use_placeholder): + shape = build_info.shape + # Will be well conditioned enough to get accurate solves. + spectrum = linear_operator_test_util.random_sign_uniform( + shape=self._shape_to_spectrum_shape(shape), + dtype=dtype, + minval=1., + maxval=2.) + + if use_placeholder: + spectrum_ph = array_ops.placeholder(dtypes.complex64) + # Evaluate here because (i) you cannot feed a tensor, and (ii) + # it is random and we want the same value used for both mat and feed_dict. + spectrum = spectrum.eval() + operator = linalg.LinearOperatorCirculant2D( + spectrum_ph, input_output_dtype=dtype) + feed_dict = {spectrum_ph: spectrum} + else: + operator = linalg.LinearOperatorCirculant2D( + spectrum, input_output_dtype=dtype) + feed_dict = None + + mat = self._spectrum_to_circulant_2d(spectrum, shape, dtype=dtype) + + return operator, mat, feed_dict + + def test_real_hermitian_spectrum_gives_real_symmetric_operator(self): + with self.test_session() as sess: + # This is a real and hermitian spectrum. + spectrum = [[1., 2., 2.], [3., 4., 4.], [3., 4., 4.]] + operator = linalg.LinearOperatorCirculant(spectrum) + + matrix_tensor = operator.to_dense() + self.assertEqual(matrix_tensor.dtype, + linear_operator_circulant._DTYPE_COMPLEX) + matrix_t = array_ops.matrix_transpose(matrix_tensor) + imag_matrix = math_ops.imag(matrix_tensor) + matrix, matrix_transpose, imag_matrix = sess.run( + [matrix_tensor, matrix_t, imag_matrix]) + + np.testing.assert_allclose(0, imag_matrix, atol=1e-6) + self.assertAllClose(matrix, matrix_transpose, atol=0) + + def test_real_spectrum_gives_self_adjoint_operator(self): + with self.test_session() as sess: + # This is a real and hermitian spectrum. + spectrum = linear_operator_test_util.random_normal( + shape=(3, 3), dtype=dtypes.float32) + operator = linalg.LinearOperatorCirculant2D(spectrum) + + matrix_tensor = operator.to_dense() + self.assertEqual(matrix_tensor.dtype, + linear_operator_circulant._DTYPE_COMPLEX) + matrix_h = linalg.adjoint(matrix_tensor) + matrix, matrix_h = sess.run([matrix_tensor, matrix_h]) + self.assertAllClose(matrix, matrix_h, atol=0) + + def test_assert_non_singular_fails_for_singular_operator(self): + spectrum = math_ops.cast([[0, 4], [2j + 2, 3.]], dtypes.complex64) + operator = linalg.LinearOperatorCirculant2D(spectrum) + with self.test_session(): + with self.assertRaisesOpError("Singular operator"): + operator.assert_non_singular().run() + + def test_assert_non_singular_does_not_fail_for_non_singular_operator(self): + spectrum = math_ops.cast([[-3j, 4], [2j + 2, 3.]], dtypes.complex64) + operator = linalg.LinearOperatorCirculant2D(spectrum) + with self.test_session(): + operator.assert_non_singular().run() # Should not fail + + def test_assert_positive_definite_fails_for_non_positive_definite(self): + spectrum = math_ops.cast([[6., 4], [2j, 3.]], dtypes.complex64) + operator = linalg.LinearOperatorCirculant2D(spectrum) + with self.test_session(): + with self.assertRaisesOpError("Not positive definite"): + operator.assert_positive_definite().run() + + def test_assert_positive_definite_does_not_fail_when_pos_def(self): + spectrum = math_ops.cast([[6., 4], [2j + 2, 3.]], dtypes.complex64) + operator = linalg.LinearOperatorCirculant2D(spectrum) + with self.test_session(): + operator.assert_positive_definite().run() # Should not fail + + def test_real_spectrum_and_not_self_adjoint_hint_raises(self): + spectrum = [[1., 2.], [3., 4]] + with self.assertRaisesRegexp(ValueError, "real.*always.*self-adjoint"): + linalg.LinearOperatorCirculant2D(spectrum, is_self_adjoint=False) + + def test_real_spectrum_auto_sets_is_self_adjoint_to_true(self): + spectrum = [[1., 2.], [3., 4]] + operator = linalg.LinearOperatorCirculant2D(spectrum) + self.assertTrue(operator.is_self_adjoint) + + def test_invalid_dtype_raises(self): + spectrum = array_ops.constant(rng.rand(2, 2, 2)) + with self.assertRaisesRegexp(TypeError, "must have dtype"): + linalg.LinearOperatorCirculant2D(spectrum) + + def test_invalid_rank_raises(self): + spectrum = array_ops.constant(np.float32(rng.rand(2))) + with self.assertRaisesRegexp(ValueError, "must have at least 2 dimensions"): + linalg.LinearOperatorCirculant2D(spectrum) + + +class LinearOperatorCirculant3DTest(test.TestCase): + """Simple test of the 3D case. See also the 1D and 2D tests.""" + + @contextlib.contextmanager + def test_session(self, *args, **kwargs): + with test.TestCase.test_session(self, *args, **kwargs) as sess: + with spectral_ops_test_util.fft_kernel_label_map(): + yield sess + + def test_real_spectrum_gives_self_adjoint_operator(self): + with self.test_session() as sess: + # This is a real and hermitian spectrum. + spectrum = linear_operator_test_util.random_normal( + shape=(2, 2, 3, 5), dtype=dtypes.float32) + operator = linalg.LinearOperatorCirculant3D(spectrum) + self.assertAllEqual((2, 2 * 3 * 5, 2 * 3 * 5), operator.shape) + + matrix_tensor = operator.to_dense() + self.assertEqual(matrix_tensor.dtype, + linear_operator_circulant._DTYPE_COMPLEX) + matrix_h = linalg.adjoint(matrix_tensor) + + matrix, matrix_h = sess.run([matrix_tensor, matrix_h]) + self.assertAllEqual((2, 2 * 3 * 5, 2 * 3 * 5), matrix.shape) + self.assertAllClose(matrix, matrix_h) + + def test_defining_operator_using_real_convolution_kernel(self): + with self.test_session(): + convolution_kernel = linear_operator_test_util.random_normal( + shape=(2, 2, 3, 5), dtype=dtypes.float32) + # Convolution kernel is real ==> spectrum is Hermitian. + spectrum = math_ops.fft3d( + math_ops.cast(convolution_kernel, dtypes.complex64)) + + # spectrum is Hermitian ==> operator is real. + operator = linalg.LinearOperatorCirculant3D(spectrum) + self.assertAllEqual((2, 2 * 3 * 5, 2 * 3 * 5), operator.shape) + + # Allow for complex output so we can make sure it has zero imag part. + self.assertEqual(operator.dtype, dtypes.complex64) + matrix = operator.to_dense().eval() + self.assertAllEqual((2, 2 * 3 * 5, 2 * 3 * 5), matrix.shape) + np.testing.assert_allclose(0, np.imag(matrix), atol=1e-6) + + def test_defining_spd_operator_by_taking_real_part(self): + with self.test_session() as sess: + # S is real and positive. + s = linear_operator_test_util.random_uniform( + shape=(10, 2, 3, 4), dtype=dtypes.float32, minval=1., maxval=2.) + + # Let S = S1 + S2, the Hermitian and anti-hermitian parts. + # S1 = 0.5 * (S + S^H), S2 = 0.5 * (S - S^H), + # where ^H is the Hermitian transpose of the function: + # f(n0, n1, n2)^H := ComplexConjugate[f(N0-n0, N1-n1, N2-n2)]. + # We want to isolate S1, since + # S1 is Hermitian by construction + # S1 is real since S is + # S1 is positive since it is the sum of two positive kernels + + # IDFT[S] = IDFT[S1] + IDFT[S2] + # = H1 + H2 + # where H1 is real since it is Hermitian, + # and H2 is imaginary since it is anti-Hermitian. + ifft_s = math_ops.ifft3d(math_ops.cast(s, dtypes.complex64)) + + # Throw away H2, keep H1. + real_ifft_s = math_ops.real(ifft_s) + + # This is the perfect spectrum! + # spectrum = DFT[H1] + # = S1, + fft_real_ifft_s = math_ops.fft3d( + math_ops.cast(real_ifft_s, dtypes.complex64)) + + # S1 is Hermitian ==> operator is real. + # S1 is real ==> operator is self-adjoint. + # S1 is positive ==> operator is positive-definite. + operator = linalg.LinearOperatorCirculant3D(fft_real_ifft_s) + + # Allow for complex output so we can check operator has zero imag part. + self.assertEqual(operator.dtype, dtypes.complex64) + matrix, matrix_t = sess.run([ + operator.to_dense(), + array_ops.matrix_transpose(operator.to_dense()) + ]) + operator.assert_positive_definite().run() # Should not fail. + np.testing.assert_allclose(0, np.imag(matrix), atol=1e-6) + self.assertAllClose(matrix, matrix_t) + + # Just to test the theory, get S2 as well. + # This should create an imaginary operator. + # S2 is anti-Hermitian ==> operator is imaginary. + # S2 is real ==> operator is self-adjoint. + imag_ifft_s = math_ops.imag(ifft_s) + fft_imag_ifft_s = math_ops.fft3d( + 1j * math_ops.cast(imag_ifft_s, dtypes.complex64)) + operator_imag = linalg.LinearOperatorCirculant3D(fft_imag_ifft_s) + + matrix, matrix_h = sess.run([ + operator_imag.to_dense(), + array_ops.matrix_transpose(math_ops.conj(operator_imag.to_dense())) + ]) + self.assertAllClose(matrix, matrix_h) + np.testing.assert_allclose(0, np.real(matrix), atol=1e-7) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/python/ops/linalg/linalg.py b/tensorflow/python/ops/linalg/linalg.py index 14319025ff..d73c21cdc0 100644 --- a/tensorflow/python/ops/linalg/linalg.py +++ b/tensorflow/python/ops/linalg/linalg.py @@ -22,6 +22,7 @@ from __future__ import print_function # pylint: disable=wildcard-import,unused-import from tensorflow.python.ops.linalg.linalg_impl import * from tensorflow.python.ops.linalg.linear_operator import * +from tensorflow.python.ops.linalg.linear_operator_circulant import * from tensorflow.python.ops.linalg.linear_operator_composition import * from tensorflow.python.ops.linalg.linear_operator_diag import * from tensorflow.python.ops.linalg.linear_operator_full_matrix import * diff --git a/tensorflow/python/ops/linalg/linear_operator_circulant.py b/tensorflow/python/ops/linalg/linear_operator_circulant.py new file mode 100644 index 0000000000..c367ed25ad --- /dev/null +++ b/tensorflow/python/ops/linalg/linear_operator_circulant.py @@ -0,0 +1,1074 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""`LinearOperator` coming from a [[nested] block] circulant matrix.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_shape +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import check_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops.distributions import util as distribution_util +from tensorflow.python.ops.linalg import linalg_impl as linalg +from tensorflow.python.ops.linalg import linear_operator +from tensorflow.python.ops.linalg import linear_operator_util +from tensorflow.python.util.tf_export import tf_export + +__all__ = [ + "LinearOperatorCirculant", + "LinearOperatorCirculant2D", + "LinearOperatorCirculant3D", +] + +# Different FFT Ops will be used for different block depths. +_FFT_OP = {1: math_ops.fft, 2: math_ops.fft2d, 3: math_ops.fft3d} +_IFFT_OP = {1: math_ops.ifft, 2: math_ops.ifft2d, 3: math_ops.ifft3d} + +# This is the only dtype allowed with fft ops. +# TODO(langmore) Add other types once available. +_DTYPE_COMPLEX = dtypes.complex64 + + +# TODO(langmore) Add transformations that create common spectrums, e.g. +# starting with the convolution kernel +# start with half a spectrum, and create a Hermitian one. +# common filters. +# TODO(langmore) Support rectangular Toeplitz matrices. +class _BaseLinearOperatorCirculant(linear_operator.LinearOperator): + """Base class for circulant operators. Not user facing. + + `LinearOperator` acting like a [batch] [[nested] block] circulant matrix. + """ + + def __init__(self, + spectrum, + block_depth, + input_output_dtype=_DTYPE_COMPLEX, + is_non_singular=None, + is_self_adjoint=None, + is_positive_definite=None, + is_square=True, + name="LinearOperatorCirculant"): + r"""Initialize an `_BaseLinearOperatorCirculant`. + + Args: + spectrum: Shape `[B1,...,Bb, N]` `Tensor`. Allowed dtypes are + `float32`, `complex64`. Type can be different than `input_output_dtype` + block_depth: Python integer, either 1, 2, or 3. Will be 1 for circulant, + 2 for block circulant, and 3 for nested block circulant. + input_output_dtype: `dtype` for input/output. Must be either + `float32` or `complex64`. + is_non_singular: Expect that this operator is non-singular. + is_self_adjoint: Expect that this operator is equal to its hermitian + transpose. If `spectrum` is real, this will always be true. + is_positive_definite: Expect that this operator is positive definite, + meaning the quadratic form `x^H A x` has positive real part for all + nonzero `x`. Note that we do not require the operator to be + self-adjoint to be positive-definite. See: + https://en.wikipedia.org/wiki/Positive-definite_matrix\ + #Extension_for_non_symmetric_matrices + is_square: Expect that this operator acts like square [batch] matrices. + name: A name to prepend to all ops created by this class. + + Raises: + ValueError: If `block_depth` is not an allowed value. + TypeError: If `spectrum` is not an allowed type. + """ + + allowed_block_depths = [1, 2, 3] + + self._name = name + + if block_depth not in allowed_block_depths: + raise ValueError("Expected block_depth to be in %s. Found: %s." % + (allowed_block_depths, block_depth)) + self._block_depth = block_depth + + with ops.name_scope(name, values=[spectrum]): + self._spectrum = self._check_spectrum_and_return_tensor(spectrum) + + # Check and auto-set hints. + if not self.spectrum.dtype.is_complex: + if is_self_adjoint is False: + raise ValueError( + "A real spectrum always corresponds to a self-adjoint operator.") + is_self_adjoint = True + + if is_square is False: + raise ValueError( + "A [[nested] block] circulant operator is always square.") + is_square = True + + # If spectrum.shape = [s0, s1, s2], and block_depth = 2, + # block_shape = [s1, s2] + s_shape = array_ops.shape(self.spectrum) + self._block_shape_tensor = s_shape[-self.block_depth:] + + # Add common variants of spectrum to the graph. + self._spectrum_complex = _to_complex(self.spectrum) + self._abs_spectrum = math_ops.abs(self.spectrum) + self._conj_spectrum = math_ops.conj(self._spectrum_complex) + + super(_BaseLinearOperatorCirculant, self).__init__( + dtype=dtypes.as_dtype(input_output_dtype), + graph_parents=[self.spectrum], + is_non_singular=is_non_singular, + is_self_adjoint=is_self_adjoint, + is_positive_definite=is_positive_definite, + is_square=is_square, + name=name) + + def _check_spectrum_and_return_tensor(self, spectrum): + """Static check of spectrum. Then return `Tensor` version.""" + spectrum = ops.convert_to_tensor(spectrum, name="spectrum") + + allowed_dtypes = [dtypes.float32, dtypes.complex64] + if spectrum.dtype not in allowed_dtypes: + raise TypeError("Argument spectrum must have dtype in %s. Found: %s" % + (allowed_dtypes, spectrum.dtype)) + if spectrum.get_shape().ndims is not None: + if spectrum.get_shape().ndims < self.block_depth: + raise ValueError( + "Argument spectrum must have at least %d dimensions. Found: %s" % + (self.block_depth, spectrum)) + return spectrum + + @property + def block_depth(self): + """Depth of recursively defined circulant blocks defining this `Operator`. + + With `A` the dense representation of this `Operator`, + + `block_depth = 1` means `A` is symmetric circulant. For example, + + ``` + A = |x y z y| + |y x y z| + |z y x y| + |y z y x| + ``` + + `block_depth = 2` means `A` is block symmetric circulant with symemtric + circulant blocks. For example, with `X`, `Y`, `Z` symmetric circulant, + + ``` + A = |X Y Z Y| + |Y X Y Z| + |Z Y X Y| + |Y Z Y X| + ``` + + `block_depth = 3` means `A` is block symmetric circulant with block + symmetric circulant blocks. + + Returns: + Python `integer`. + """ + return self._block_depth + + def block_shape_tensor(self): + """Shape of the block dimensions of `self.spectrum`.""" + return self._block_shape_tensor + + @property + def block_shape(self): + return self.spectrum.get_shape()[-self.block_depth:] + + @property + def spectrum(self): + return self._spectrum + + def _vectorize_then_blockify(self, matrix): + """Shape batch matrix to batch vector, then blockify trailing dimensions.""" + # Suppose + # matrix.shape = [m0, m1, m2, m3], + # and matrix is a matrix because the final two dimensions are matrix dims. + # self.block_depth = 2, + # self.block_shape = [b0, b1] (note b0 * b1 = m2). + # We will reshape matrix to + # [m3, m0, m1, b0, b1]. + + # Vectorize: Reshape to batch vector. + # [m0, m1, m2, m3] --> [m3, m0, m1, m2] + # This is called "vectorize" because we have taken the final two matrix dims + # and turned this into a size m3 batch of vectors. + vec = distribution_util.rotate_transpose(matrix, shift=1) + + # Blockify: Blockfy trailing dimensions. + # [m3, m0, m1, m2] --> [m3, m0, m1, b0, b1] + if (vec.get_shape().is_fully_defined() and + self.block_shape.is_fully_defined()): + # vec_leading_shape = [m3, m0, m1], + # the parts of vec that will not be blockified. + vec_leading_shape = vec.get_shape()[:-1] + final_shape = vec_leading_shape.concatenate(self.block_shape) + else: + vec_leading_shape = array_ops.shape(vec)[:-1] + final_shape = array_ops.concat( + (vec_leading_shape, self.block_shape_tensor()), 0) + return array_ops.reshape(vec, final_shape) + + def _unblockify_then_matricize(self, vec): + """Flatten the block dimensions then reshape to a batch matrix.""" + # Suppose + # vec.shape = [v0, v1, v2, v3], + # self.block_depth = 2. + # Then + # leading shape = [v0, v1] + # block shape = [v2, v3]. + # We will reshape vec to + # [v1, v2*v3, v0]. + + # Un-blockify: Flatten block dimensions. Reshape + # [v0, v1, v2, v3] --> [v0, v1, v2*v3]. + if vec.get_shape().is_fully_defined(): + # vec_shape = [v0, v1, v2, v3] + vec_shape = vec.get_shape().as_list() + # vec_leading_shape = [v0, v1] + vec_leading_shape = vec_shape[:-self.block_depth] + # vec_block_shape = [v2, v3] + vec_block_shape = vec_shape[-self.block_depth:] + # flat_shape = [v0, v1, v2*v3] + flat_shape = vec_leading_shape + [np.prod(vec_block_shape)] + else: + vec_shape = array_ops.shape(vec) + vec_leading_shape = vec_shape[:-self.block_depth] + vec_block_shape = vec_shape[-self.block_depth:] + flat_shape = array_ops.concat( + (vec_leading_shape, [math_ops.reduce_prod(vec_block_shape)]), 0) + vec_flat = array_ops.reshape(vec, flat_shape) + + # Matricize: Reshape to batch matrix. + # [v0, v1, v2*v3] --> [v1, v2*v3, v0], + # representing a shape [v1] batch of [v2*v3, v0] matrices. + matrix = distribution_util.rotate_transpose(vec_flat, shift=-1) + return matrix + + def _fft(self, x): + """FFT along the last self.block_depth dimensions of x. + + Args: + x: `Tensor` with floating or complex `dtype`. + Should be in the form returned by self._vectorize_then_blockify. + + Returns: + `Tensor` with `dtype` `complex64`. + """ + x_complex = _to_complex(x) + return _FFT_OP[self.block_depth](x_complex) + + def _ifft(self, x): + """IFFT along the last self.block_depth dimensions of x. + + Args: + x: `Tensor` with floating or complex dtype. Should be in the form + returned by self._vectorize_then_blockify. + + Returns: + `Tensor` with `dtype` `complex64`. + """ + x_complex = _to_complex(x) + return _IFFT_OP[self.block_depth](x_complex) + + def convolution_kernel(self, name="convolution_kernel"): + """Convolution kernel corresponding to `self.spectrum`. + + The `D` dimensional DFT of this kernel is the frequency domain spectrum of + this operator. + + Args: + name: A name to give this `Op`. + + Returns: + `Tensor` with `dtype` `self.dtype`. + """ + with self._name_scope(name): + h = self._ifft(self._spectrum_complex) + return math_ops.cast(h, self.dtype) + + def _shape(self): + s_shape = self._spectrum.get_shape() + # Suppose spectrum.shape = [a, b, c, d] + # block_depth = 2 + # Then: + # batch_shape = [a, b] + # N = c*d + # and we want to return + # [a, b, c*d, c*d] + batch_shape = s_shape[:-self.block_depth] + # trailing_dims = [c, d] + trailing_dims = s_shape[-self.block_depth:] + if trailing_dims.is_fully_defined(): + n = np.prod(trailing_dims.as_list()) + else: + n = None + n_x_n = tensor_shape.TensorShape([n, n]) + return batch_shape.concatenate(n_x_n) + + def _shape_tensor(self): + # See self.shape for explanation of steps + s_shape = array_ops.shape(self._spectrum) + batch_shape = s_shape[:-self.block_depth] + trailing_dims = s_shape[-self.block_depth:] + n = math_ops.reduce_prod(trailing_dims) + n_x_n = [n, n] + return array_ops.concat((batch_shape, n_x_n), 0) + + def assert_hermitian_spectrum(self, name="assert_hermitian_spectrum"): + """Returns an `Op` that asserts this operator has Hermitian spectrum. + + This operator corresponds to a real-valued matrix if and only if its + spectrum is Hermitian. + + Args: + name: A name to give this `Op`. + + Returns: + An `Op` that asserts this operator has Hermitian spectrum. + """ + eps = np.finfo(self.dtype.real_dtype.as_numpy_dtype).eps + with self._name_scope(name): + # Assume linear accumulation of error. + max_err = eps * self.domain_dimension_tensor() + imag_convolution_kernel = math_ops.imag(self.convolution_kernel()) + return check_ops.assert_less( + math_ops.abs(imag_convolution_kernel), + max_err, + message="Spectrum was not Hermitian") + + def _assert_non_singular(self): + return linear_operator_util.assert_no_entries_with_modulus_zero( + self.spectrum, + message="Singular operator: Spectrum contained zero values.") + + def _assert_positive_definite(self): + # This operator has the action Ax = F^H D F x, + # where D is the diagonal matrix with self.spectrum on the diag. Therefore, + # = , + # Since F is bijective, the condition for positive definite is the same as + # for a diagonal matrix, i.e. real part of spectrum is positive. + message = ( + "Not positive definite: Real part of spectrum was not all positive.") + return check_ops.assert_positive( + math_ops.real(self.spectrum), message=message) + + def _assert_self_adjoint(self): + # Recall correspondence between symmetry and real transforms. See docstring + return linear_operator_util.assert_zero_imag_part( + self.spectrum, + message=( + "Not self-adjoint: The spectrum contained non-zero imaginary part." + )) + + def _broadcast_batch_dims(self, x, spectrum): + """Broadcast batch dims of batch matrix `x` and spectrum.""" + # spectrum.shape = batch_shape + block_shape + # First make spectrum a batch matrix with + # spectrum.shape = batch_shape + [prod(block_shape), 1] + spec_mat = array_ops.reshape( + spectrum, array_ops.concat( + (self.batch_shape_tensor(), [-1, 1]), axis=0)) + # Second, broadcast, possibly requiring an addition of array of zeros. + x, spec_mat = linear_operator_util.broadcast_matrix_batch_dims((x, + spec_mat)) + # Third, put the block shape back into spectrum. + batch_shape = array_ops.shape(x)[:-2] + spectrum = array_ops.reshape( + spec_mat, + array_ops.concat((batch_shape, self.block_shape_tensor()), axis=0)) + + return x, spectrum + + def _matmul(self, x, adjoint=False, adjoint_arg=False): + x = linalg.adjoint(x) if adjoint_arg else x + # With F the matrix of a DFT, and F^{-1}, F^H the inverse and Hermitian + # transpose, one can show that F^{-1} = F^{H} is the IDFT matrix. Therefore + # matmul(x) = F^{-1} diag(spectrum) F x, + # = F^{H} diag(spectrum) F x, + # so that + # matmul(x, adjoint=True) = F^{H} diag(conj(spectrum)) F x. + spectrum = self._conj_spectrum if adjoint else self._spectrum_complex + + x, spectrum = self._broadcast_batch_dims(x, spectrum) + + x_vb = self._vectorize_then_blockify(x) + fft_x_vb = self._fft(x_vb) + block_vector_result = self._ifft(spectrum * fft_x_vb) + y = self._unblockify_then_matricize(block_vector_result) + + return math_ops.cast(y, self.dtype) + + def _determinant(self): + reduction_indices = [-(i + 1) for i in range(self.block_depth)] + det = math_ops.reduce_prod( + self.spectrum, reduction_indices=reduction_indices) + return math_ops.cast(det, self.dtype) + + def _log_abs_determinant(self): + reduction_indices = [-(i + 1) for i in range(self.block_depth)] + lad = math_ops.reduce_sum( + math_ops.log(self._abs_spectrum), reduction_indices=reduction_indices) + return math_ops.cast(lad, self.dtype) + + def _solve(self, rhs, adjoint=False, adjoint_arg=False): + rhs = linalg.adjoint(rhs) if adjoint_arg else rhs + spectrum = self._conj_spectrum if adjoint else self._spectrum_complex + + rhs, spectrum = self._broadcast_batch_dims(rhs, spectrum) + + rhs_vb = self._vectorize_then_blockify(rhs) + fft_rhs_vb = self._fft(rhs_vb) + solution_vb = self._ifft(fft_rhs_vb / spectrum) + x = self._unblockify_then_matricize(solution_vb) + return math_ops.cast(x, self.dtype) + + def _diag_part(self): + # Get ones in shape of diag, which is [B1,...,Bb, N] + # Also get the size of the diag, "N". + if self.shape.is_fully_defined(): + diag_shape = self.shape[:-1] + diag_size = self.domain_dimension.value + else: + diag_shape = self.shape_tensor()[:-1] + diag_size = self.domain_dimension_tensor() + ones_diag = array_ops.ones(diag_shape, dtype=self.dtype) + + # As proved in comments in self._trace, the value on the diag is constant, + # repeated N times. This value is the trace divided by N. + + # The handling of self.shape = (0, 0) is tricky, and is the reason we choose + # to compute trace and use that to compute diag_part, rather than computing + # the value on the diagonal ("diag_value") directly. Both result in a 0/0, + # but in different places, and the current method gives the right result in + # the end. + + # Here, if self.shape = (0, 0), then self.trace() = 0., and then + # diag_value = 0. / 0. = NaN. + diag_value = self.trace() / math_ops.cast(diag_size, self.dtype) + + # If self.shape = (0, 0), then ones_diag = [] (empty tensor), and then + # the following line is NaN * [] = [], as needed. + return diag_value[..., array_ops.newaxis] * ones_diag + + def _trace(self): + # The diagonal of the [[nested] block] circulant operator is the mean of + # the spectrum. + # Proof: For the [0,...,0] element, this follows from the IDFT formula. + # Then the result follows since all diagonal elements are the same. + + # Therefore, the trace is the sum of the spectrum. + + # Get shape of diag along with the axis over which to reduce the spectrum. + # We will reduce the spectrum over all block indices. + if self.spectrum.get_shape().is_fully_defined(): + spec_rank = self.spectrum.get_shape().ndims + axis = np.arange(spec_rank - self.block_depth, spec_rank, dtype=np.int32) + else: + spec_rank = array_ops.rank(self.spectrum) + axis = math_ops.range(spec_rank - self.block_depth, spec_rank) + + # Real diag part "re_d". + # Suppose spectrum.shape = [B1,...,Bb, N1, N2] + # self.shape = [B1,...,Bb, N, N], with N1 * N2 = N. + # re_d_value.shape = [B1,...,Bb] + re_d_value = math_ops.reduce_sum(math_ops.real(self.spectrum), axis=axis) + + if not self.dtype.is_complex: + return math_ops.cast(re_d_value, self.dtype) + + # Imaginary part, "im_d". + if self.is_self_adjoint: + im_d_value = 0. + else: + im_d_value = math_ops.reduce_sum(math_ops.imag(self.spectrum), axis=axis) + + return math_ops.cast(math_ops.complex(re_d_value, im_d_value), self.dtype) + + +@tf_export("linalg.LinearOperatorCirculant") +class LinearOperatorCirculant(_BaseLinearOperatorCirculant): + """`LinearOperator` acting like a circulant matrix. + + This operator acts like a circulant matrix `A` with + shape `[B1,...,Bb, N, N]` for some `b >= 0`. The first `b` indices index a + batch member. For every batch index `(i1,...,ib)`, `A[i1,...,ib, : :]` is + an `N x N` matrix. This matrix `A` is not materialized, but for + purposes of broadcasting this shape will be relevant. + + #### Description in terms of circulant matrices + + Circulant means the entries of `A` are generated by a single vector, the + convolution kernel `h`: `A_{mn} := h_{m-n mod N}`. With `h = [w, x, y, z]`, + + ``` + A = |w z y x| + |x w z y| + |y x w z| + |z y x w| + ``` + + This means that the result of matrix multiplication `v = Au` has `Lth` column + given circular convolution between `h` with the `Lth` column of `u`. + + See http://ee.stanford.edu/~gray/toeplitz.pdf + + #### Description in terms of the frequency spectrum + + There is an equivalent description in terms of the [batch] spectrum `H` and + Fourier transforms. Here we consider `A.shape = [N, N]` and ignore batch + dimensions. Define the discrete Fourier transform (DFT) and its inverse by + + ``` + DFT[ h[n] ] = H[k] := sum_{n = 0}^{N - 1} h_n e^{-i 2pi k n / N} + IDFT[ H[k] ] = h[n] = N^{-1} sum_{k = 0}^{N - 1} H_k e^{i 2pi k n / N} + ``` + + From these definitions, we see that + + ``` + H[0] = sum_{n = 0}^{N - 1} h_n + H[1] = "the first positive frequency" + H[N - 1] = "the first negative frequency" + ``` + + Loosely speaking, with `*` element-wise multiplication, matrix multiplication + is equal to the action of a Fourier multiplier: `A u = IDFT[ H * DFT[u] ]`. + Precisely speaking, given `[N, R]` matrix `u`, let `DFT[u]` be the `[N, R]` + matrix with `rth` column equal to the DFT of the `rth` column of `u`. + Define the `IDFT` similarly. + Matrix multiplication may be expressed columnwise: + + ```(A u)_r = IDFT[ H * (DFT[u])_r ]``` + + #### Operator properties deduced from the spectrum. + + Letting `U` be the `kth` Euclidean basis vector, and `U = IDFT[u]`. + The above formulas show that`A U = H_k * U`. We conclude that the elements + of `H` are the eigenvalues of this operator. Therefore + + * This operator is positive definite if and only if `Real{H} > 0`. + + A general property of Fourier transforms is the correspondence between + Hermitian functions and real valued transforms. + + Suppose `H.shape = [B1,...,Bb, N]`. We say that `H` is a Hermitian spectrum + if, with `%` meaning modulus division, + + ```H[..., n % N] = ComplexConjugate[ H[..., (-n) % N] ]``` + + * This operator corresponds to a real matrix if and only if `H` is Hermitian. + * This operator is self-adjoint if and only if `H` is real. + + See e.g. "Discrete-Time Signal Processing", Oppenheim and Schafer. + + #### Example of a self-adjoint positive definite operator + + ```python + # spectrum is real ==> operator is self-adjoint + # spectrum is positive ==> operator is positive definite + spectrum = [6., 4, 2] + + operator = LinearOperatorCirculant(spectrum) + + # IFFT[spectrum] + operator.convolution_kernel() + ==> [4 + 0j, 1 + 0.58j, 1 - 0.58j] + + operator.to_dense() + ==> [[4 + 0.0j, 1 - 0.6j, 1 + 0.6j], + [1 + 0.6j, 4 + 0.0j, 1 - 0.6j], + [1 - 0.6j, 1 + 0.6j, 4 + 0.0j]] + ``` + + #### Example of defining in terms of a real convolution kernel + + ```python + # convolution_kernel is real ==> spectrum is Hermitian. + convolution_kernel = [1., 2., 1.]] + spectrum = tf.fft(tf.cast(convolution_kernel, tf.complex64)) + + # spectrum is Hermitian ==> operator is real. + # spectrum is shape [3] ==> operator is shape [3, 3] + # We force the input/output type to be real, which allows this to operate + # like a real matrix. + operator = LinearOperatorCirculant(spectrum, input_output_dtype=tf.float32) + + operator.to_dense() + ==> [[ 1, 1, 2], + [ 2, 1, 1], + [ 1, 2, 1]] + ``` + + #### Example of Hermitian spectrum + + ```python + # spectrum is shape [3] ==> operator is shape [3, 3] + # spectrum is Hermitian ==> operator is real. + spectrum = [1, 1j, -1j] + + operator = LinearOperatorCirculant(spectrum) + + operator.to_dense() + ==> [[ 0.33 + 0j, 0.91 + 0j, -0.24 + 0j], + [-0.24 + 0j, 0.33 + 0j, 0.91 + 0j], + [ 0.91 + 0j, -0.24 + 0j, 0.33 + 0j] + ``` + + #### Example of forcing real `dtype` when spectrum is Hermitian + + ```python + # spectrum is shape [4] ==> operator is shape [4, 4] + # spectrum is real ==> operator is self-adjoint + # spectrum is Hermitian ==> operator is real + # spectrum has positive real part ==> operator is positive-definite. + spectrum = [6., 4, 2, 4] + + # Force the input dtype to be float32. + # Cast the output to float32. This is fine because the operator will be + # real due to Hermitian spectrum. + operator = LinearOperatorCirculant(spectrum, input_output_dtype=tf.float32) + + operator.shape + ==> [4, 4] + + operator.to_dense() + ==> [[4, 1, 0, 1], + [1, 4, 1, 0], + [0, 1, 4, 1], + [1, 0, 1, 4]] + + # convolution_kernel = tf.ifft(spectrum) + operator.convolution_kernel() + ==> [4, 1, 0, 1] + ``` + + #### Performance + + Suppose `operator` is a `LinearOperatorCirculant` of shape `[N, N]`, + and `x.shape = [N, R]`. Then + + * `operator.matmul(x)` is `O(R*N*Log[N])` + * `operator.solve(x)` is `O(R*N*Log[N])` + * `operator.determinant()` involves a size `N` `reduce_prod`. + + If instead `operator` and `x` have shape `[B1,...,Bb, N, N]` and + `[B1,...,Bb, N, R]`, every operation increases in complexity by `B1*...*Bb`. + + #### Matrix property hints + + This `LinearOperator` is initialized with boolean flags of the form `is_X`, + for `X = non_singular, self_adjoint, positive_definite, square`. + These have the following meaning: + + * If `is_X == True`, callers should expect the operator to have the + property `X`. This is a promise that should be fulfilled, but is *not* a + runtime assert. For example, finite floating point precision may result + in these promises being violated. + * If `is_X == False`, callers should expect the operator to not have `X`. + * If `is_X == None` (the default), callers should have no expectation either + way. + """ + + def __init__(self, + spectrum, + input_output_dtype=_DTYPE_COMPLEX, + is_non_singular=None, + is_self_adjoint=None, + is_positive_definite=None, + is_square=True, + name="LinearOperatorCirculant"): + r"""Initialize an `LinearOperatorCirculant`. + + This `LinearOperator` is initialized to have shape `[B1,...,Bb, N, N]` + by providing `spectrum`, a `[B1,...,Bb, N]` `Tensor`. + + If `input_output_dtype = DTYPE`: + + * Arguments to methods such as `matmul` or `solve` must be `DTYPE`. + * Values returned by all methods, such as `matmul` or `determinant` will be + cast to `DTYPE`. + + Note that if the spectrum is not Hermitian, then this operator corresponds + to a complex matrix with non-zero imaginary part. In this case, setting + `input_output_dtype` to a real type will forcibly cast the output to be + real, resulting in incorrect results! + + If on the other hand the spectrum is Hermitian, then this operator + corresponds to a real-valued matrix, and setting `input_output_dtype` to + a real type is fine. + + Args: + spectrum: Shape `[B1,...,Bb, N]` `Tensor`. Allowed dtypes are + `float32`, `complex64`. Type can be different than `input_output_dtype` + input_output_dtype: `dtype` for input/output. Must be either + `float32` or `complex64`. + is_non_singular: Expect that this operator is non-singular. + is_self_adjoint: Expect that this operator is equal to its hermitian + transpose. If `spectrum` is real, this will always be true. + is_positive_definite: Expect that this operator is positive definite, + meaning the quadratic form `x^H A x` has positive real part for all + nonzero `x`. Note that we do not require the operator to be + self-adjoint to be positive-definite. See: + https://en.wikipedia.org/wiki/Positive-definite_matrix\ + #Extension_for_non_symmetric_matrices + is_square: Expect that this operator acts like square [batch] matrices. + name: A name to prepend to all ops created by this class. + """ + super(LinearOperatorCirculant, self).__init__( + spectrum, + block_depth=1, + input_output_dtype=input_output_dtype, + is_non_singular=is_non_singular, + is_self_adjoint=is_self_adjoint, + is_positive_definite=is_positive_definite, + is_square=is_square, + name=name) + + +@tf_export("linalg.LinearOperatorCirculant2D") +class LinearOperatorCirculant2D(_BaseLinearOperatorCirculant): + """`LinearOperator` acting like a block circulant matrix. + + This operator acts like a block circulant matrix `A` with + shape `[B1,...,Bb, N, N]` for some `b >= 0`. The first `b` indices index a + batch member. For every batch index `(i1,...,ib)`, `A[i1,...,ib, : :]` is + an `N x N` matrix. This matrix `A` is not materialized, but for + purposes of broadcasting this shape will be relevant. + + #### Description in terms of block circulant matrices + + If `A` is block circulant, with block sizes `N0, N1` (`N0 * N1 = N`): + `A` has a block circulant structure, composed of `N0 x N0` blocks, with each + block an `N1 x N1` circulant matrix. + + For example, with `W`, `X`, `Y`, `Z` each circulant, + + ``` + A = |W Z Y X| + |X W Z Y| + |Y X W Z| + |Z Y X W| + ``` + + Note that `A` itself will not in general be circulant. + + #### Description in terms of the frequency spectrum + + There is an equivalent description in terms of the [batch] spectrum `H` and + Fourier transforms. Here we consider `A.shape = [N, N]` and ignore batch + dimensions. + + If `H.shape = [N0, N1]`, (`N0 * N1 = N`): + Loosely speaking, matrix multiplication is equal to the action of a + Fourier multiplier: `A u = IDFT2[ H DFT2[u] ]`. + Precisely speaking, given `[N, R]` matrix `u`, let `DFT2[u]` be the + `[N0, N1, R]` `Tensor` defined by re-shaping `u` to `[N0, N1, R]` and taking + a two dimensional DFT across the first two dimensions. Let `IDFT2` be the + inverse of `DFT2`. Matrix multiplication may be expressed columnwise: + + ```(A u)_r = IDFT2[ H * (DFT2[u])_r ]``` + + #### Operator properties deduced from the spectrum. + + * This operator is positive definite if and only if `Real{H} > 0`. + + A general property of Fourier transforms is the correspondence between + Hermitian functions and real valued transforms. + + Suppose `H.shape = [B1,...,Bb, N0, N1]`, we say that `H` is a Hermitian + spectrum if, with `%` indicating modulus division, + + ``` + H[..., n0 % N0, n1 % N1] = ComplexConjugate[ H[..., (-n0) % N0, (-n1) % N1 ]. + ``` + + * This operator corresponds to a real matrix if and only if `H` is Hermitian. + * This operator is self-adjoint if and only if `H` is real. + + See e.g. "Discrete-Time Signal Processing", Oppenheim and Schafer. + + ### Example of a self-adjoint positive definite operator + + ```python + # spectrum is real ==> operator is self-adjoint + # spectrum is positive ==> operator is positive definite + spectrum = [[1., 2., 3.], + [4., 5., 6.], + [7., 8., 9.]] + + operator = LinearOperatorCirculant2D(spectrum) + + # IFFT[spectrum] + operator.convolution_kernel() + ==> [[5.0+0.0j, -0.5-.3j, -0.5+.3j], + [-1.5-.9j, 0, 0], + [-1.5+.9j, 0, 0]] + + operator.to_dense() + ==> Complex self adjoint 9 x 9 matrix. + ``` + + #### Example of defining in terms of a real convolution kernel, + + ```python + # convolution_kernel is real ==> spectrum is Hermitian. + convolution_kernel = [[1., 2., 1.], [5., -1., 1.]] + spectrum = tf.fft2d(tf.cast(convolution_kernel, tf.complex64)) + + # spectrum is shape [2, 3] ==> operator is shape [6, 6] + # spectrum is Hermitian ==> operator is real. + operator = LinearOperatorCirculant2D(spectrum, input_output_dtype=tf.float32) + ``` + + #### Performance + + Suppose `operator` is a `LinearOperatorCirculant` of shape `[N, N]`, + and `x.shape = [N, R]`. Then + + * `operator.matmul(x)` is `O(R*N*Log[N])` + * `operator.solve(x)` is `O(R*N*Log[N])` + * `operator.determinant()` involves a size `N` `reduce_prod`. + + If instead `operator` and `x` have shape `[B1,...,Bb, N, N]` and + `[B1,...,Bb, N, R]`, every operation increases in complexity by `B1*...*Bb`. + + #### Matrix property hints + + This `LinearOperator` is initialized with boolean flags of the form `is_X`, + for `X = non_singular, self_adjoint, positive_definite, square`. + These have the following meaning + * If `is_X == True`, callers should expect the operator to have the + property `X`. This is a promise that should be fulfilled, but is *not* a + runtime assert. For example, finite floating point precision may result + in these promises being violated. + * If `is_X == False`, callers should expect the operator to not have `X`. + * If `is_X == None` (the default), callers should have no expectation either + way. + """ + + def __init__(self, + spectrum, + input_output_dtype=_DTYPE_COMPLEX, + is_non_singular=None, + is_self_adjoint=None, + is_positive_definite=None, + is_square=True, + name="LinearOperatorCirculant2D"): + r"""Initialize an `LinearOperatorCirculant2D`. + + This `LinearOperator` is initialized to have shape `[B1,...,Bb, N, N]` + by providing `spectrum`, a `[B1,...,Bb, N0, N1]` `Tensor` with `N0*N1 = N`. + + If `input_output_dtype = DTYPE`: + + * Arguments to methods such as `matmul` or `solve` must be `DTYPE`. + * Values returned by all methods, such as `matmul` or `determinant` will be + cast to `DTYPE`. + + Note that if the spectrum is not Hermitian, then this operator corresponds + to a complex matrix with non-zero imaginary part. In this case, setting + `input_output_dtype` to a real type will forcibly cast the output to be + real, resulting in incorrect results! + + If on the other hand the spectrum is Hermitian, then this operator + corresponds to a real-valued matrix, and setting `input_output_dtype` to + a real type is fine. + + Args: + spectrum: Shape `[B1,...,Bb, N]` `Tensor`. Allowed dtypes are + `float32`, `complex64`. Type can be different than `input_output_dtype` + input_output_dtype: `dtype` for input/output. Must be either + `float32` or `complex64`. + is_non_singular: Expect that this operator is non-singular. + is_self_adjoint: Expect that this operator is equal to its hermitian + transpose. If `spectrum` is real, this will always be true. + is_positive_definite: Expect that this operator is positive definite, + meaning the quadratic form `x^H A x` has positive real part for all + nonzero `x`. Note that we do not require the operator to be + self-adjoint to be positive-definite. See: + https://en.wikipedia.org/wiki/Positive-definite_matrix\ + #Extension_for_non_symmetric_matrices + is_square: Expect that this operator acts like square [batch] matrices. + name: A name to prepend to all ops created by this class. + """ + super(LinearOperatorCirculant2D, self).__init__( + spectrum, + block_depth=2, + input_output_dtype=input_output_dtype, + is_non_singular=is_non_singular, + is_self_adjoint=is_self_adjoint, + is_positive_definite=is_positive_definite, + is_square=is_square, + name=name) + + +@tf_export("linalg.LinearOperatorCirculant3D") +class LinearOperatorCirculant3D(_BaseLinearOperatorCirculant): + """`LinearOperator` acting like a nested block circulant matrix. + + This operator acts like a block circulant matrix `A` with + shape `[B1,...,Bb, N, N]` for some `b >= 0`. The first `b` indices index a + batch member. For every batch index `(i1,...,ib)`, `A[i1,...,ib, : :]` is + an `N x N` matrix. This matrix `A` is not materialized, but for + purposes of broadcasting this shape will be relevant. + + #### Description in terms of block circulant matrices + + If `A` is nested block circulant, with block sizes `N0, N1, N2` + (`N0 * N1 * N2 = N`): + `A` has a block structure, composed of `N0 x N0` blocks, with each + block an `N1 x N1` block circulant matrix. + + For example, with `W`, `X`, `Y`, `Z` each block circulant, + + ``` + A = |W Z Y X| + |X W Z Y| + |Y X W Z| + |Z Y X W| + ``` + + Note that `A` itself will not in general be circulant. + + #### Description in terms of the frequency spectrum + + There is an equivalent description in terms of the [batch] spectrum `H` and + Fourier transforms. Here we consider `A.shape = [N, N]` and ignore batch + dimensions. + + If `H.shape = [N0, N1, N2]`, (`N0 * N1 * N2 = N`): + Loosely speaking, matrix multiplication is equal to the action of a + Fourier multiplier: `A u = IDFT3[ H DFT3[u] ]`. + Precisely speaking, given `[N, R]` matrix `u`, let `DFT3[u]` be the + `[N0, N1, N2, R]` `Tensor` defined by re-shaping `u` to `[N0, N1, N2, R]` and + taking a three dimensional DFT across the first three dimensions. Let `IDFT3` + be the inverse of `DFT3`. Matrix multiplication may be expressed columnwise: + + ```(A u)_r = IDFT3[ H * (DFT3[u])_r ]``` + + #### Operator properties deduced from the spectrum. + + * This operator is positive definite if and only if `Real{H} > 0`. + + A general property of Fourier transforms is the correspondence between + Hermitian functions and real valued transforms. + + Suppose `H.shape = [B1,...,Bb, N0, N1, N2]`, we say that `H` is a Hermitian + spectrum if, with `%` meaning modulus division, + + ``` + H[..., n0 % N0, n1 % N1, n2 % N2] + = ComplexConjugate[ H[..., (-n0) % N0, (-n1) % N1, (-n2) % N2] ]. + ``` + + * This operator corresponds to a real matrix if and only if `H` is Hermitian. + * This operator is self-adjoint if and only if `H` is real. + + See e.g. "Discrete-Time Signal Processing", Oppenheim and Schafer. + + ### Examples + + See `LinearOperatorCirculant` and `LinearOperatorCirculant2D` for examples. + + #### Performance + + Suppose `operator` is a `LinearOperatorCirculant` of shape `[N, N]`, + and `x.shape = [N, R]`. Then + + * `operator.matmul(x)` is `O(R*N*Log[N])` + * `operator.solve(x)` is `O(R*N*Log[N])` + * `operator.determinant()` involves a size `N` `reduce_prod`. + + If instead `operator` and `x` have shape `[B1,...,Bb, N, N]` and + `[B1,...,Bb, N, R]`, every operation increases in complexity by `B1*...*Bb`. + + #### Matrix property hints + + This `LinearOperator` is initialized with boolean flags of the form `is_X`, + for `X = non_singular, self_adjoint, positive_definite, square`. + These have the following meaning + * If `is_X == True`, callers should expect the operator to have the + property `X`. This is a promise that should be fulfilled, but is *not* a + runtime assert. For example, finite floating point precision may result + in these promises being violated. + * If `is_X == False`, callers should expect the operator to not have `X`. + * If `is_X == None` (the default), callers should have no expectation either + way. + """ + + def __init__(self, + spectrum, + input_output_dtype=_DTYPE_COMPLEX, + is_non_singular=None, + is_self_adjoint=None, + is_positive_definite=None, + is_square=True, + name="LinearOperatorCirculant3D"): + """Initialize an `LinearOperatorCirculant`. + + This `LinearOperator` is initialized to have shape `[B1,...,Bb, N, N]` + by providing `spectrum`, a `[B1,...,Bb, N0, N1, N2]` `Tensor` + with `N0*N1*N2 = N`. + + If `input_output_dtype = DTYPE`: + + * Arguments to methods such as `matmul` or `solve` must be `DTYPE`. + * Values returned by all methods, such as `matmul` or `determinant` will be + cast to `DTYPE`. + + Note that if the spectrum is not Hermitian, then this operator corresponds + to a complex matrix with non-zero imaginary part. In this case, setting + `input_output_dtype` to a real type will forcibly cast the output to be + real, resulting in incorrect results! + + If on the other hand the spectrum is Hermitian, then this operator + corresponds to a real-valued matrix, and setting `input_output_dtype` to + a real type is fine. + + Args: + spectrum: Shape `[B1,...,Bb, N]` `Tensor`. Allowed dtypes are + `float32`, `complex64`. Type can be different than `input_output_dtype` + input_output_dtype: `dtype` for input/output. Must be either + `float32` or `complex64`. + is_non_singular: Expect that this operator is non-singular. + is_self_adjoint: Expect that this operator is equal to its hermitian + transpose. If `spectrum` is real, this will always be true. + is_positive_definite: Expect that this operator is positive definite, + meaning the real part of all eigenvalues is positive. We do not require + the operator to be self-adjoint to be positive-definite. See: + https://en.wikipedia.org/wiki/Positive-definite_matrix + #Extension_for_non_symmetric_matrices + is_square: Expect that this operator acts like square [batch] matrices. + name: A name to prepend to all ops created by this class. + """ + super(LinearOperatorCirculant3D, self).__init__( + spectrum, + block_depth=3, + input_output_dtype=input_output_dtype, + is_non_singular=is_non_singular, + is_self_adjoint=is_self_adjoint, + is_positive_definite=is_positive_definite, + is_square=is_square, + name=name) + + +def _to_complex(x): + return math_ops.cast(x, _DTYPE_COMPLEX) diff --git a/tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-circulant.__metaclass__.pbtxt b/tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-circulant.__metaclass__.pbtxt new file mode 100644 index 0000000000..3b33f3da97 --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-circulant.__metaclass__.pbtxt @@ -0,0 +1,14 @@ +path: "tensorflow.linalg.LinearOperatorCirculant.__metaclass__" +tf_class { + is_instance: "" + member_method { + name: "__init__" + } + member_method { + name: "mro" + } + member_method { + name: "register" + argspec: "args=[\'cls\', \'subclass\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-circulant.pbtxt b/tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-circulant.pbtxt new file mode 100644 index 0000000000..de917706d5 --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-circulant.pbtxt @@ -0,0 +1,155 @@ +path: "tensorflow.linalg.LinearOperatorCirculant" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + member { + name: "batch_shape" + mtype: "" + } + member { + name: "block_depth" + mtype: "" + } + member { + name: "block_shape" + mtype: "" + } + member { + name: "domain_dimension" + mtype: "" + } + member { + name: "dtype" + mtype: "" + } + member { + name: "graph_parents" + mtype: "" + } + member { + name: "is_non_singular" + mtype: "" + } + member { + name: "is_positive_definite" + mtype: "" + } + member { + name: "is_self_adjoint" + mtype: "" + } + member { + name: "is_square" + mtype: "" + } + member { + name: "name" + mtype: "" + } + member { + name: "range_dimension" + mtype: "" + } + member { + name: "shape" + mtype: "" + } + member { + name: "spectrum" + mtype: "" + } + member { + name: "tensor_rank" + mtype: "" + } + member_method { + name: "__init__" + argspec: "args=[\'self\', \'spectrum\', \'input_output_dtype\', \'is_non_singular\', \'is_self_adjoint\', \'is_positive_definite\', \'is_square\', \'name\'], varargs=None, keywords=None, defaults=[\"\", \'None\', \'None\', \'None\', \'True\', \'LinearOperatorCirculant\'], " + } + member_method { + name: "add_to_tensor" + argspec: "args=[\'self\', \'x\', \'name\'], varargs=None, keywords=None, defaults=[\'add_to_tensor\'], " + } + member_method { + name: "assert_hermitian_spectrum" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'assert_hermitian_spectrum\'], " + } + member_method { + name: "assert_non_singular" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'assert_non_singular\'], " + } + member_method { + name: "assert_positive_definite" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'assert_positive_definite\'], " + } + member_method { + name: "assert_self_adjoint" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'assert_self_adjoint\'], " + } + member_method { + name: "batch_shape_tensor" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'batch_shape_tensor\'], " + } + member_method { + name: "block_shape_tensor" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "convolution_kernel" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'convolution_kernel\'], " + } + member_method { + name: "determinant" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'det\'], " + } + member_method { + name: "diag_part" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'diag_part\'], " + } + member_method { + name: "domain_dimension_tensor" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'domain_dimension_tensor\'], " + } + member_method { + name: "log_abs_determinant" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'log_abs_det\'], " + } + member_method { + name: "matmul" + argspec: "args=[\'self\', \'x\', \'adjoint\', \'adjoint_arg\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'matmul\'], " + } + member_method { + name: "matvec" + argspec: "args=[\'self\', \'x\', \'adjoint\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'matvec\'], " + } + member_method { + name: "range_dimension_tensor" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'range_dimension_tensor\'], " + } + member_method { + name: "shape_tensor" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'shape_tensor\'], " + } + member_method { + name: "solve" + argspec: "args=[\'self\', \'rhs\', \'adjoint\', \'adjoint_arg\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'solve\'], " + } + member_method { + name: "solvevec" + argspec: "args=[\'self\', \'rhs\', \'adjoint\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'solve\'], " + } + member_method { + name: "tensor_rank_tensor" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'tensor_rank_tensor\'], " + } + member_method { + name: "to_dense" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'to_dense\'], " + } + member_method { + name: "trace" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'trace\'], " + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-circulant2-d.__metaclass__.pbtxt b/tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-circulant2-d.__metaclass__.pbtxt new file mode 100644 index 0000000000..591bc9631a --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-circulant2-d.__metaclass__.pbtxt @@ -0,0 +1,14 @@ +path: "tensorflow.linalg.LinearOperatorCirculant2D.__metaclass__" +tf_class { + is_instance: "" + member_method { + name: "__init__" + } + member_method { + name: "mro" + } + member_method { + name: "register" + argspec: "args=[\'cls\', \'subclass\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-circulant2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-circulant2-d.pbtxt new file mode 100644 index 0000000000..c4e6a21c3a --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-circulant2-d.pbtxt @@ -0,0 +1,155 @@ +path: "tensorflow.linalg.LinearOperatorCirculant2D" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + member { + name: "batch_shape" + mtype: "" + } + member { + name: "block_depth" + mtype: "" + } + member { + name: "block_shape" + mtype: "" + } + member { + name: "domain_dimension" + mtype: "" + } + member { + name: "dtype" + mtype: "" + } + member { + name: "graph_parents" + mtype: "" + } + member { + name: "is_non_singular" + mtype: "" + } + member { + name: "is_positive_definite" + mtype: "" + } + member { + name: "is_self_adjoint" + mtype: "" + } + member { + name: "is_square" + mtype: "" + } + member { + name: "name" + mtype: "" + } + member { + name: "range_dimension" + mtype: "" + } + member { + name: "shape" + mtype: "" + } + member { + name: "spectrum" + mtype: "" + } + member { + name: "tensor_rank" + mtype: "" + } + member_method { + name: "__init__" + argspec: "args=[\'self\', \'spectrum\', \'input_output_dtype\', \'is_non_singular\', \'is_self_adjoint\', \'is_positive_definite\', \'is_square\', \'name\'], varargs=None, keywords=None, defaults=[\"\", \'None\', \'None\', \'None\', \'True\', \'LinearOperatorCirculant2D\'], " + } + member_method { + name: "add_to_tensor" + argspec: "args=[\'self\', \'x\', \'name\'], varargs=None, keywords=None, defaults=[\'add_to_tensor\'], " + } + member_method { + name: "assert_hermitian_spectrum" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'assert_hermitian_spectrum\'], " + } + member_method { + name: "assert_non_singular" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'assert_non_singular\'], " + } + member_method { + name: "assert_positive_definite" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'assert_positive_definite\'], " + } + member_method { + name: "assert_self_adjoint" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'assert_self_adjoint\'], " + } + member_method { + name: "batch_shape_tensor" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'batch_shape_tensor\'], " + } + member_method { + name: "block_shape_tensor" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "convolution_kernel" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'convolution_kernel\'], " + } + member_method { + name: "determinant" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'det\'], " + } + member_method { + name: "diag_part" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'diag_part\'], " + } + member_method { + name: "domain_dimension_tensor" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'domain_dimension_tensor\'], " + } + member_method { + name: "log_abs_determinant" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'log_abs_det\'], " + } + member_method { + name: "matmul" + argspec: "args=[\'self\', \'x\', \'adjoint\', \'adjoint_arg\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'matmul\'], " + } + member_method { + name: "matvec" + argspec: "args=[\'self\', \'x\', \'adjoint\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'matvec\'], " + } + member_method { + name: "range_dimension_tensor" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'range_dimension_tensor\'], " + } + member_method { + name: "shape_tensor" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'shape_tensor\'], " + } + member_method { + name: "solve" + argspec: "args=[\'self\', \'rhs\', \'adjoint\', \'adjoint_arg\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'solve\'], " + } + member_method { + name: "solvevec" + argspec: "args=[\'self\', \'rhs\', \'adjoint\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'solve\'], " + } + member_method { + name: "tensor_rank_tensor" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'tensor_rank_tensor\'], " + } + member_method { + name: "to_dense" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'to_dense\'], " + } + member_method { + name: "trace" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'trace\'], " + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-circulant3-d.__metaclass__.pbtxt b/tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-circulant3-d.__metaclass__.pbtxt new file mode 100644 index 0000000000..d643139a53 --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-circulant3-d.__metaclass__.pbtxt @@ -0,0 +1,14 @@ +path: "tensorflow.linalg.LinearOperatorCirculant3D.__metaclass__" +tf_class { + is_instance: "" + member_method { + name: "__init__" + } + member_method { + name: "mro" + } + member_method { + name: "register" + argspec: "args=[\'cls\', \'subclass\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-circulant3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-circulant3-d.pbtxt new file mode 100644 index 0000000000..2e085a8e28 --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-circulant3-d.pbtxt @@ -0,0 +1,155 @@ +path: "tensorflow.linalg.LinearOperatorCirculant3D" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + member { + name: "batch_shape" + mtype: "" + } + member { + name: "block_depth" + mtype: "" + } + member { + name: "block_shape" + mtype: "" + } + member { + name: "domain_dimension" + mtype: "" + } + member { + name: "dtype" + mtype: "" + } + member { + name: "graph_parents" + mtype: "" + } + member { + name: "is_non_singular" + mtype: "" + } + member { + name: "is_positive_definite" + mtype: "" + } + member { + name: "is_self_adjoint" + mtype: "" + } + member { + name: "is_square" + mtype: "" + } + member { + name: "name" + mtype: "" + } + member { + name: "range_dimension" + mtype: "" + } + member { + name: "shape" + mtype: "" + } + member { + name: "spectrum" + mtype: "" + } + member { + name: "tensor_rank" + mtype: "" + } + member_method { + name: "__init__" + argspec: "args=[\'self\', \'spectrum\', \'input_output_dtype\', \'is_non_singular\', \'is_self_adjoint\', \'is_positive_definite\', \'is_square\', \'name\'], varargs=None, keywords=None, defaults=[\"\", \'None\', \'None\', \'None\', \'True\', \'LinearOperatorCirculant3D\'], " + } + member_method { + name: "add_to_tensor" + argspec: "args=[\'self\', \'x\', \'name\'], varargs=None, keywords=None, defaults=[\'add_to_tensor\'], " + } + member_method { + name: "assert_hermitian_spectrum" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'assert_hermitian_spectrum\'], " + } + member_method { + name: "assert_non_singular" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'assert_non_singular\'], " + } + member_method { + name: "assert_positive_definite" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'assert_positive_definite\'], " + } + member_method { + name: "assert_self_adjoint" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'assert_self_adjoint\'], " + } + member_method { + name: "batch_shape_tensor" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'batch_shape_tensor\'], " + } + member_method { + name: "block_shape_tensor" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "convolution_kernel" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'convolution_kernel\'], " + } + member_method { + name: "determinant" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'det\'], " + } + member_method { + name: "diag_part" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'diag_part\'], " + } + member_method { + name: "domain_dimension_tensor" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'domain_dimension_tensor\'], " + } + member_method { + name: "log_abs_determinant" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'log_abs_det\'], " + } + member_method { + name: "matmul" + argspec: "args=[\'self\', \'x\', \'adjoint\', \'adjoint_arg\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'matmul\'], " + } + member_method { + name: "matvec" + argspec: "args=[\'self\', \'x\', \'adjoint\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'matvec\'], " + } + member_method { + name: "range_dimension_tensor" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'range_dimension_tensor\'], " + } + member_method { + name: "shape_tensor" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'shape_tensor\'], " + } + member_method { + name: "solve" + argspec: "args=[\'self\', \'rhs\', \'adjoint\', \'adjoint_arg\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'solve\'], " + } + member_method { + name: "solvevec" + argspec: "args=[\'self\', \'rhs\', \'adjoint\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'solve\'], " + } + member_method { + name: "tensor_rank_tensor" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'tensor_rank_tensor\'], " + } + member_method { + name: "to_dense" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'to_dense\'], " + } + member_method { + name: "trace" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'trace\'], " + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.linalg.pbtxt b/tensorflow/tools/api/golden/tensorflow.linalg.pbtxt index 1d9c0c0f6d..7a5c533872 100644 --- a/tensorflow/tools/api/golden/tensorflow.linalg.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.linalg.pbtxt @@ -4,6 +4,18 @@ tf_module { name: "LinearOperator" mtype: "" } + member { + name: "LinearOperatorCirculant" + mtype: "" + } + member { + name: "LinearOperatorCirculant2D" + mtype: "" + } + member { + name: "LinearOperatorCirculant3D" + mtype: "" + } member { name: "LinearOperatorComposition" mtype: "" -- GitLab From b9e12bc69df65eca279a90045d045e661fdb8108 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 24 Apr 2018 06:24:43 -0700 Subject: [PATCH 198/434] Make tf.contrib.framework.zero_initializer work with ResourceVariable PiperOrigin-RevId: 194077027 --- tensorflow/contrib/framework/BUILD | 1 + .../framework/kernels/zero_initializer_op.cc | 71 +++++++++++++++++++ .../contrib/framework/ops/variable_ops.cc | 29 ++++++++ .../contrib/framework/python/ops/variables.py | 8 ++- .../framework/python/ops/variables_test.py | 26 +++++++ 5 files changed, 134 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/framework/BUILD b/tensorflow/contrib/framework/BUILD index b1c8ad49ea..f675cc0cf0 100644 --- a/tensorflow/contrib/framework/BUILD +++ b/tensorflow/contrib/framework/BUILD @@ -93,6 +93,7 @@ tf_kernel_library( ], deps = [ "//tensorflow/core:framework", + "//tensorflow/core:framework_headers_lib", "//third_party/eigen3", ], alwayslink = 1, diff --git a/tensorflow/contrib/framework/kernels/zero_initializer_op.cc b/tensorflow/contrib/framework/kernels/zero_initializer_op.cc index 5bf6b67529..6ab3f460b3 100644 --- a/tensorflow/contrib/framework/kernels/zero_initializer_op.cc +++ b/tensorflow/contrib/framework/kernels/zero_initializer_op.cc @@ -23,6 +23,7 @@ limitations under the License. #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" +#include "tensorflow/core/framework/resource_var.h" namespace tensorflow { @@ -85,4 +86,74 @@ TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS); #undef REGISTER_KERNELS +template +class ZeroVarInitializer : public OpKernel { + public: + explicit ZeroVarInitializer(OpKernelConstruction* ctx) : OpKernel(ctx) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("dtype", &dtype_)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("shape", &shape_)); + } + + void Compute(OpKernelContext* ctx) override { + Var* variable = nullptr; + OP_REQUIRES_OK(ctx, LookupOrCreateResource( + ctx, HandleFromInput(ctx, 0), &variable, + [this, ctx](Var** var_ptr) { + *var_ptr = new Var(dtype_); + PersistentTensor unused; + Tensor* var_tensor = nullptr; + AllocatorAttributes attr; + attr.set_gpu_compatible(true); + attr.set_nic_compatible(true); + TF_RETURN_IF_ERROR(ctx->allocate_persistent( + dtype_, shape_, &unused, &var_tensor, attr)); + + functor::TensorSetZero()( + ctx->eigen_device(), + var_tensor->flat()); + + *(*var_ptr)->tensor() = *var_tensor; + + return Status::OK(); + })); + + core::ScopedUnref scoped(variable); + mutex_lock ml(*variable->mu()); + + OP_REQUIRES(ctx, !variable->is_initialized, + errors::InvalidArgument("input is already initialized")); + + variable->is_initialized = true; + + Tensor* output = nullptr; + OP_REQUIRES_OK(ctx, ctx->allocate_output(0, TensorShape({}), &output)); + output->scalar()() = HandleFromInput(ctx, 0); + } + + private: + DataType dtype_; + TensorShape shape_; +}; + +#define REGISTER_CPU_KERNELS(type) \ + REGISTER_KERNEL_BUILDER(Name("ZeroVarInitializer") \ + .Device(DEVICE_CPU) \ + .TypeConstraint("dtype"), \ + ZeroVarInitializer); + +TF_CALL_REAL_NUMBER_TYPES(REGISTER_CPU_KERNELS); +#undef REGISTER_CPU_KERNELS + +#if GOOGLE_CUDA +#define REGISTER_GPU_KERNELS(type) \ + REGISTER_KERNEL_BUILDER(Name("ZeroVarInitializer") \ + .Device(DEVICE_GPU) \ + .TypeConstraint("dtype") \ + .HostMemory("var"), \ + ZeroVarInitializer); + +TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS); +#undef REGISTER_GPU_KERNELS +#endif // GOOGLE_CUDA + } // namespace tensorflow diff --git a/tensorflow/contrib/framework/ops/variable_ops.cc b/tensorflow/contrib/framework/ops/variable_ops.cc index 706134ba9a..f6ee6cdb57 100644 --- a/tensorflow/contrib/framework/ops/variable_ops.cc +++ b/tensorflow/contrib/framework/ops/variable_ops.cc @@ -39,4 +39,33 @@ ref: Should be from a `Variable` node. output_ref:= Same as "ref". )doc"); +REGISTER_OP("ZeroVarInitializer") + .Input("var: resource") + .Output("output_var: resource") + .Attr("dtype: type") + .Attr("shape: shape") + .SetAllowsUninitializedInput() + .SetShapeFn([](InferenceContext* c) { + c->set_output(0, c->Scalar()); + DataType t; + TF_RETURN_IF_ERROR(c->GetAttr("dtype", &t)); + PartialTensorShape p; + TF_RETURN_IF_ERROR(c->GetAttr("shape", &p)); + shape_inference::ShapeHandle s; + TF_RETURN_IF_ERROR(c->MakeShapeFromPartialTensorShape(p, &s)); + c->set_output_handle_shapes_and_types( + 0, std::vector{{s, t}}); + + return Status::OK(); + }) + .Doc(R"doc( +Initialize 'var' with all zeros. This op requires that the resource var is not +initialized. The var will first be allocated memory, then be filled with all +zeros. This op is intended to save memory during initialization, +if you use this op, you should not run initializer of the var. + +var: Should be a ResourceVariable. +output_var:= Same as "var". +)doc"); + } // namespace tensorflow diff --git a/tensorflow/contrib/framework/python/ops/variables.py b/tensorflow/contrib/framework/python/ops/variables.py index 0754c3e0e3..40ae01bfcc 100644 --- a/tensorflow/contrib/framework/python/ops/variables.py +++ b/tensorflow/contrib/framework/python/ops/variables.py @@ -32,6 +32,7 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import resource_variable_ops from tensorflow.python.ops import variable_scope from tensorflow.python.platform import resource_loader from tensorflow.python.platform import tf_logging as logging @@ -82,7 +83,12 @@ def zero_initializer(ref, use_locking=True, name="zero_initializer"): """ loader.load_op_library( resource_loader.get_path_to_datafile("_variable_ops.so")) - return gen_variable_ops.zero_initializer(ref, name=name) + if resource_variable_ops.is_resource_variable(ref): + return gen_variable_ops.zero_var_initializer( + ref.handle, shape=ref.shape, dtype=ref.dtype, name=name) + else: + return gen_variable_ops.zero_initializer(ref, name=name) + @deprecated(None, "Please switch to tf.train.assert_global_step") def assert_global_step(global_step_tensor): diff --git a/tensorflow/contrib/framework/python/ops/variables_test.py b/tensorflow/contrib/framework/python/ops/variables_test.py index 2f06df93ac..37ea6eb12a 100644 --- a/tensorflow/contrib/framework/python/ops/variables_test.py +++ b/tensorflow/contrib/framework/python/ops/variables_test.py @@ -1284,6 +1284,32 @@ class ZeroInitializerOpTest(test.TestCase): [10, 20], dtype=dtype), use_init) +class ZeroVarInitializerOpTest(test.TestCase): + + def _testZeroVarInitializer(self, shape, initializer, use_init): + var = resource_variable_ops.ResourceVariable(initializer) + var_zero = variables_lib2.zero_initializer(var) + + with self.test_session() as sess: + with self.assertRaisesOpError('Error while reading resource variable'): + var.eval() + if use_init: + sess.run(var.initializer) + with self.assertRaisesOpError('input is already initialized'): + var_zero.eval() + self.assertAllClose(np.ones(shape), var.eval()) + else: + var_zero.eval() + self.assertAllClose(np.zeros(shape), var.eval()) + + def testZeroVarInitializer(self): + for dtype in (dtypes.int32, dtypes.int64, dtypes.float32, dtypes.float64): + for use_init in (False, True): + self._testZeroVarInitializer([10, 20], + array_ops.ones([10, 20], dtype=dtype), + use_init) + + class FilterVariablesTest(test.TestCase): def setUp(self): -- GitLab From 5eb233d0686636a7bacc5b8813c079b6b9aa483c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 24 Apr 2018 07:06:27 -0700 Subject: [PATCH 199/434] Introduce a new HLO shape and sharding matcher. These new matchers can be used in tests in combination to the existing HLO opcode matchers to better verify a generated HLO graph. PiperOrigin-RevId: 194082100 --- tensorflow/compiler/xla/service/BUILD | 1 + .../compiler/xla/service/hlo_matchers.cc | 63 +++++++++++++++++ .../compiler/xla/service/hlo_matchers.h | 69 +++++++++++++++++++ .../compiler/xla/service/hlo_matchers_test.cc | 58 ++++++++++++++++ 4 files changed, 191 insertions(+) diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index afb344e5ae..5edb9440c0 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -359,6 +359,7 @@ cc_library( ":hlo", "//tensorflow/compiler/xla:test", "//tensorflow/compiler/xla/tests:xla_internal_test_main", + "//tensorflow/core:lib", ], ) diff --git a/tensorflow/compiler/xla/service/hlo_matchers.cc b/tensorflow/compiler/xla/service/hlo_matchers.cc index bc74c4bc10..69deac263e 100644 --- a/tensorflow/compiler/xla/service/hlo_matchers.cc +++ b/tensorflow/compiler/xla/service/hlo_matchers.cc @@ -132,6 +132,69 @@ bool HloCustomCallMatcher::MatchAndExplain( return result; } +bool HloShapeMatcher::MatchAndExplain( + const HloInstruction* instruction, + ::testing::MatchResultListener* listener) const { + if (ShapeUtil::Compatible(instruction->shape(), shape_)) { + return true; + } + *listener << instruction->ToString() << " has incorrect shape (expected: " + << ShapeUtil::HumanString(shape_) << ")"; + return false; +} + +void HloShapeMatcher::DescribeTo(std::ostream* os) const { + *os << ShapeUtil::HumanString(shape_); +} + +bool HloShapeAndLayoutMatcher::MatchAndExplain( + const HloInstruction* instruction, + ::testing::MatchResultListener* listener) const { + if (ShapeUtil::Equal(instruction->shape(), shape_)) { + return true; + } + *listener << instruction->ToString() << " has incorrect shape (expected: " + << ShapeUtil::HumanStringWithLayout(shape_) << ")"; + return false; +} + +void HloShapeAndLayoutMatcher::DescribeTo(std::ostream* os) const { + *os << ShapeUtil::HumanStringWithLayout(shape_); +} + +bool HloShardingMatcher::MatchAndExplain( + const HloInstruction* instruction, + ::testing::MatchResultListener* listener) const { + if (!sharding_.has_value()) { + if (!instruction->has_sharding()) { + return true; + } + *listener << instruction->ToString() << " expected to have no sharding."; + return false; + } + if (instruction->has_sharding()) { + if (instruction->sharding() == sharding_.value()) { + return true; + } + *listener << instruction->ToString() + << " has incorrect sharding (expected: " << sharding_->ToString() + << ")"; + return false; + } else { + *listener << instruction->ToString() + << " has no sharding (expected: " << sharding_->ToString() << ")"; + return false; + } +} + +void HloShardingMatcher::DescribeTo(std::ostream* os) const { + if (sharding_.has_value()) { + *os << sharding_->ToString(); + } else { + *os << ""; + } +} + } // namespace testing void PrintTo(const HloInstruction* inst, ::std::ostream* os) { diff --git a/tensorflow/compiler/xla/service/hlo_matchers.h b/tensorflow/compiler/xla/service/hlo_matchers.h index 103f04a2cb..f2ab9b5d9b 100644 --- a/tensorflow/compiler/xla/service/hlo_matchers.h +++ b/tensorflow/compiler/xla/service/hlo_matchers.h @@ -18,6 +18,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo_instruction.h" #include "tensorflow/compiler/xla/test.h" +#include "tensorflow/core/lib/gtl/optional.h" namespace xla { namespace testing { @@ -86,6 +87,50 @@ class HloCustomCallMatcher : public HloMatcher { ::testing::Matcher call_target_matcher_; }; +class HloShapeMatcher + : public ::testing::MatcherInterface { + public: + explicit HloShapeMatcher(const Shape& shape) : shape_(shape) {} + + bool MatchAndExplain(const HloInstruction* instruction, + ::testing::MatchResultListener* listener) const override; + void DescribeTo(std::ostream* os) const override; + + private: + Shape shape_; +}; + +class HloShapeAndLayoutMatcher + : public ::testing::MatcherInterface { + public: + explicit HloShapeAndLayoutMatcher(const Shape& shape) : shape_(shape) {} + + bool MatchAndExplain(const HloInstruction* instruction, + ::testing::MatchResultListener* listener) const override; + void DescribeTo(std::ostream* os) const override; + + private: + Shape shape_; +}; + +// Verify the sharding of an instruction against the provided HloSharding. If a +// nullopt is provided for the expected sharding then it checks that no sharding +// is present for an instruction. +class HloShardingMatcher + : public ::testing::MatcherInterface { + public: + explicit HloShardingMatcher( + const tensorflow::gtl::optional& sharding) + : sharding_(sharding) {} + + bool MatchAndExplain(const HloInstruction* instruction, + ::testing::MatchResultListener* listener) const override; + void DescribeTo(std::ostream* os) const override; + + private: + tensorflow::gtl::optional sharding_; +}; + // HloInstruction* matchers for opcode and operands. Example: // namespace op = xla::opcode_matchers; // EXPECT_THAT(instruction, @@ -231,6 +276,30 @@ inline ::testing::Matcher CustomCall() { new ::xla::testing::HloMatcher(HloOpcode::kCustomCall, {})); } +// Verifies the shape or the shape and the layout of an HLO instruction against +// the provided shape object. +inline ::testing::Matcher Shape( + const class Shape& shape) { + return ::testing::MakeMatcher(new ::xla::testing::HloShapeMatcher(shape)); +} +inline ::testing::Matcher ShapeWithLayout( + const class Shape& shape) { + return ::testing::MakeMatcher( + new ::xla::testing::HloShapeAndLayoutMatcher(shape)); +} + +// Verifies the value of the HloSharing against the provided sharding object. +inline ::testing::Matcher Sharding( + const HloSharding& sharding) { + return ::testing::MakeMatcher( + new ::xla::testing::HloShardingMatcher(sharding)); +} +// Verifies that no HloSharding is set for an HLO instruction. +inline ::testing::Matcher NoSharding() { + return ::testing::MakeMatcher( + new ::xla::testing::HloShardingMatcher(tensorflow::gtl::nullopt)); +} + #undef HLO_MATCHER } // namespace opcode_matchers diff --git a/tensorflow/compiler/xla/service/hlo_matchers_test.cc b/tensorflow/compiler/xla/service/hlo_matchers_test.cc index 1c21703a45..c6373b2e46 100644 --- a/tensorflow/compiler/xla/service/hlo_matchers_test.cc +++ b/tensorflow/compiler/xla/service/hlo_matchers_test.cc @@ -100,5 +100,63 @@ TEST(HloMatchersTest, CustomCallMatcher) { R"(custom-call with call target that is equal to "foo_target")"); } +TEST(HloMatchersTest, ShapeMatcher) { + auto p0 = HloInstruction::CreateParameter( + 0, ShapeUtil::MakeShapeWithLayout(F32, {5, 7}, {0, 1}), "param"); + + EXPECT_THAT(p0.get(), op::Shape(ShapeUtil::MakeShape(F32, {5, 7}))); + EXPECT_THAT( + p0.get(), + ::testing::Not(op::ShapeWithLayout(ShapeUtil::MakeShape(F32, {5, 7})))); + EXPECT_THAT(p0.get(), + ::testing::Not(op::Shape(ShapeUtil::MakeShape(F32, {7, 5})))); + EXPECT_THAT( + p0.get(), + ::testing::Not(op::ShapeWithLayout(ShapeUtil::MakeShape(F32, {7, 5})))); + EXPECT_THAT(p0.get(), + op::Shape(ShapeUtil::MakeShapeWithLayout(F32, {5, 7}, {0, 1}))); + EXPECT_THAT(p0.get(), op::ShapeWithLayout(ShapeUtil::MakeShapeWithLayout( + F32, {5, 7}, {0, 1}))); + EXPECT_THAT(p0.get(), + ::testing::Not(op::ShapeWithLayout( + ShapeUtil::MakeShapeWithLayout(F32, {5, 7}, {1, 0})))); + + EXPECT_THAT(Explain(p0.get(), op::Shape(ShapeUtil::MakeShape(F32, {7, 5}))), + "%param = f32[5,7]{0,1} parameter(0) has incorrect shape " + "(expected: f32[7,5])"); + EXPECT_THAT( + Explain(p0.get(), op::ShapeWithLayout(ShapeUtil::MakeShapeWithLayout( + F32, {7, 5}, {1, 0}))), + "%param = f32[5,7]{0,1} parameter(0) has incorrect shape " + "(expected: f32[7,5]{1,0})"); +} + +TEST(HloMatchersTest, ShardingMatcher) { + auto p0 = HloInstruction::CreateParameter(0, ShapeUtil::MakeShape(F32, {5}), + "param.0"); + p0->clear_sharding(); + auto p1 = HloInstruction::CreateParameter(1, ShapeUtil::MakeShape(F32, {7}), + "param.1"); + p1->set_sharding(HloSharding::AssignDevice(1)); + + EXPECT_THAT(p0.get(), op::NoSharding()); + EXPECT_THAT(p0.get(), + ::testing::Not(op::Sharding(HloSharding::AssignDevice(1)))); + EXPECT_THAT(p1.get(), ::testing::Not(op::NoSharding())); + EXPECT_THAT(p1.get(), + ::testing::Not(op::Sharding(HloSharding::AssignDevice(0)))); + EXPECT_THAT(p1.get(), op::Sharding(HloSharding::AssignDevice(1))); + + EXPECT_THAT(Explain(p0.get(), op::Sharding(HloSharding::AssignDevice(1))), + "%param.0 = f32[5]{0} parameter(0) has no sharding (expected: " + "{maximal device=1})"); + EXPECT_THAT(Explain(p1.get(), op::NoSharding()), + "%param.1 = f32[7]{0} parameter(1), sharding={maximal device=1} " + "expected to have no sharding."); + EXPECT_THAT(Explain(p1.get(), op::Sharding(HloSharding::AssignDevice(0))), + "%param.1 = f32[7]{0} parameter(1), sharding={maximal device=1} " + "has incorrect sharding (expected: {maximal device=0})"); +} + } // namespace } // namespace xla -- GitLab From 1ce99cfa52b19a40cff8a9ae983a0a7f04eb2bf1 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 24 Apr 2018 07:38:49 -0700 Subject: [PATCH 200/434] Softens the requirements in the HLO sharding validation The goal is to support tiled shardings where the last N tile have no data. PiperOrigin-RevId: 194085302 --- .../compiler/xla/service/hlo_sharding.cc | 39 +++++++------------ .../compiler/xla/service/hlo_sharding_test.cc | 15 ++----- 2 files changed, 16 insertions(+), 38 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_sharding.cc b/tensorflow/compiler/xla/service/hlo_sharding.cc index 1b42349b0b..994de44123 100644 --- a/tensorflow/compiler/xla/service/hlo_sharding.cc +++ b/tensorflow/compiler/xla/service/hlo_sharding.cc @@ -256,37 +256,24 @@ Status HloSharding::ValidateNonTuple(const Shape& shape, ", input_shape=", ShapeUtil::HumanString(shape)); } - // The tile shape must not be the same as the input shape without maximal_ - // also set. If this is the case, we're not actually sharded and the correct - // constructor should have been used. - if (ShapeUtil::Equal(shape, tile_shape_)) { + // The correct constructor have to be used to create tile maximal shardings. + if (tile_assignment_.num_elements() == 1) { return tensorflow::errors::InvalidArgument( - "Tile shape is the same as the input shape. If a replicated sharding " - "was intended, use HloSharding::Replicated(). If a device placement " - "was intended, use HloSharding::AssignDevice()"); + "Tile assignment only contains a single device. If a replicated " + "sharding was intended, use HloSharding::Replicated(). If a device " + "placement was intended, use HloSharding::AssignDevice()"); } - // The tile shape must not be greater than the input shape in any dimension. - for (int64 i = 0, e = ShapeUtil::Rank(shape); i != e; ++i) { - auto tile_dim = tile_shape_.dimensions(i); - auto shape_dim = shape.dimensions(i); - if (tile_dim > shape_dim) { - return tensorflow::errors::InvalidArgument( - StrCat("Tile is larger than input shape (dimension ", i, ", ", - tile_dim, " > ", shape_dim)); - } - } - - // The tile assignment tensor must be exactly dimensioned to ceil(shape[dim] - // tile[dim]) for every dimension contained within tile. + // The tile assignment tensor must contain enough element to cover the full + // shape with tiles of the specified size. for (int64 i = 0, e = tile_assignment_.dimensions().size(); i != e; ++i) { - int64 expected_dim = - CeilOfRatio(shape.dimensions(i), tile_shape_.dimensions(i)); - if (tile_assignment_.dimensions()[i] != expected_dim) { + int64 total_tile_size = tile_assignment_.dim(i) * tile_shape_.dimensions(i); + if (shape.dimensions(i) > total_tile_size) { return tensorflow::errors::InvalidArgument( - StrCat("Tile assignment tensor has incorrect shape. Dimension ", i, - " expected ", expected_dim, " but got ", - tile_assignment_.dimensions()[i])); + StrCat("Tile assignment tensor has too few element to cover the full " + "shape. Dimension ", + i, ", shape ", shape.dimensions(i), ", total size ", + total_tile_size)); } } diff --git a/tensorflow/compiler/xla/service/hlo_sharding_test.cc b/tensorflow/compiler/xla/service/hlo_sharding_test.cc index 69ea4233e4..3bf0d25efb 100644 --- a/tensorflow/compiler/xla/service/hlo_sharding_test.cc +++ b/tensorflow/compiler/xla/service/hlo_sharding_test.cc @@ -88,7 +88,7 @@ TEST_F(HloShardingTest, Tile) { } { - // Test should pass. + // Test should fail because of more devices used then `num_device`. Shape tile_shape = ShapeUtil::MakeShape(U32, {2, 3}); HloSharding sharding = HloSharding::Tile(tile_shape, MakeArray({2, 2}, {0, 1, 2, 3})); @@ -97,17 +97,8 @@ TEST_F(HloShardingTest, Tile) { } { - // Test should fail due to the tile being larger than the input space. - Shape tile_shape = ShapeUtil::MakeShape(U32, {2, 3}); - HloSharding sharding = - HloSharding::Tile(tile_shape, MakeArray({2, 2}, {0, 1, 2, 3})); - EXPECT_IS_NOT_OK(sharding.Validate(ShapeUtil::MakeShape(F32, {2, 2}), - /*num_devices=*/4)); - } - - { - // Test should fail due to the tile not dividing the input space into 4 - // sections (even with padding). + // Test should fail because the total tiled size in dimension 0 is 4 but we + // have 6 elements along that dimensions. Shape tile_shape = ShapeUtil::MakeShape(U32, {2, 3}); HloSharding sharding = HloSharding::Tile(tile_shape, MakeArray({2, 2}, {0, 1, 2, 3})); -- GitLab From 38b531ddfb1e2fd0afd765710e4416fd555b98ae Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 24 Apr 2018 09:11:15 -0700 Subject: [PATCH 201/434] Internal Change PiperOrigin-RevId: 194096341 --- tensorflow/core/BUILD | 74 ++++++++++++++++--- .../core/platform/default/build_config.bzl | 49 +++++++++++- tensorflow/tensorflow.bzl | 33 +++++++-- tensorflow/tools/proto_text/BUILD | 7 +- .../proto_text/gen_proto_text_functions.cc | 6 +- 5 files changed, 146 insertions(+), 23 deletions(-) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index ba1fd41565..843fd7b907 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -161,7 +161,7 @@ exports_files(["ops/ops.pbtxt"]) # Note that some protos are in neither additional_core_proto_srcs nor this # filegroup; e.g. ones with individual proto_library targets. # LINT.IfChange -CORE_PROTO_SRCS = [ +COMMON_PROTO_SRCS = [ "example/example.proto", "example/feature.proto", "framework/allocation_description.proto", @@ -189,7 +189,6 @@ CORE_PROTO_SRCS = [ "framework/types.proto", "framework/variable.proto", "framework/versions.proto", - "lib/core/error_codes.proto", "protobuf/config.proto", "protobuf/cluster.proto", "protobuf/debug.proto", @@ -202,8 +201,14 @@ CORE_PROTO_SRCS = [ "util/memmapped_file_system.proto", "util/saved_tensor_slice.proto", ] + +ERROR_CODES_PROTO_SRCS = [ + "lib/core/error_codes.proto", +] # LINT.ThenChange(//tensorflow/core/android_proto_config.asciipb) +CORE_PROTO_SRCS = COMMON_PROTO_SRCS + ERROR_CODES_PROTO_SRCS + # Protos which are not needed on mobile builds, but should be included in # protos_all. # @@ -224,12 +229,16 @@ ADDITIONAL_CORE_PROTO_SRCS = [ tf_proto_library( name = "protos_all", - srcs = CORE_PROTO_SRCS + ADDITIONAL_CORE_PROTO_SRCS, + srcs = [], cc_api_version = 2, default_header = True, j2objc_api_version = 1, java_api_version = 2, js_api_version = 2, + protodeps = [ + ":protos_all_proto", + ":error_codes_proto", + ], visibility = ["//visibility:public"], ) @@ -1134,7 +1143,8 @@ filegroup( filegroup( name = "mobile_srcs_no_runtime", srcs = [ - ":proto_text_srcs_all", + ":protos_all_proto_text_srcs", + ":error_codes_proto_text_srcs", "//tensorflow/core/platform/default/build_config:android_srcs", ] + glob( [ @@ -1930,15 +1940,58 @@ cc_library( ], ) -proto_text_hdrs_and_srcs = tf_generate_proto_text_sources( - name = "proto_text_srcs_all", - srcs = CORE_PROTO_SRCS, +tf_proto_library( + name = "error_codes_proto", + srcs = ERROR_CODES_PROTO_SRCS, + cc_api_version = 2, + default_header = True, + j2objc_api_version = 1, + java_api_version = 2, + js_api_version = 2, +) + +tf_generate_proto_text_sources( + name = "error_codes_proto_text", + srcs = ERROR_CODES_PROTO_SRCS, + protodeps = [], + srcs_relative_dir = "tensorflow/core/", + deps = [ + ":error_codes_proto_cc", + ":lib_internal", + ], +) + +tf_proto_library( + name = "protos_all_proto", + srcs = COMMON_PROTO_SRCS + ADDITIONAL_CORE_PROTO_SRCS, + cc_api_version = 2, + default_header = True, + j2objc_api_version = 1, + java_api_version = 2, + js_api_version = 2, + protodeps = [ + ":error_codes_proto", + ], +) + +tf_generate_proto_text_sources( + name = "protos_all_proto_text", + srcs = COMMON_PROTO_SRCS, + protodeps = ERROR_CODES_PROTO_SRCS, srcs_relative_dir = "tensorflow/core/", + deps = [ + ":error_codes_proto_text", + ":lib_internal", + ":protos_all_proto_cc", + ], ) cc_library( name = "proto_text", - hdrs = proto_text_hdrs_and_srcs.hdrs, + hdrs = [ + ":error_codes_proto_text_hdrs", + ":protos_all_proto_text_hdrs", + ], deps = [ ":lib", ":lib_internal", @@ -2083,7 +2136,7 @@ tf_cuda_library( "util/memmapped_file_system.cc", "util/memmapped_file_system_writer.cc", ], - }) + proto_text_hdrs_and_srcs.srcs + tf_additional_framework_srcs(), + }) + tf_additional_framework_srcs(), hdrs = FRAMEWORK_INTERNAL_PUBLIC_HEADERS, copts = tf_copts(), linkopts = select({ @@ -2097,7 +2150,8 @@ tf_cuda_library( deps = [ ":lib", ":lib_internal", - ":proto_text", + ":protos_all_proto_text", + ":error_codes_proto_text", ":protos_all_cc", ":version_lib", "//tensorflow/core/platform/default/build_config:platformlib", diff --git a/tensorflow/core/platform/default/build_config.bzl b/tensorflow/core/platform/default/build_config.bzl index 44356e3438..ca0587e277 100644 --- a/tensorflow/core/platform/default/build_config.bzl +++ b/tensorflow/core/platform/default/build_config.bzl @@ -319,10 +319,34 @@ def tf_proto_library_cc(name, srcs = [], has_services = None, use_grpc_plugin = None if cc_grpc_version: use_grpc_plugin = True + + cc_deps = tf_deps(protodeps, "_cc") + cc_name = name + "_cc" + if not srcs: + # This is a collection of sub-libraries. Build header-only and impl + # libraries containing all the sources. + proto_gen( + name = cc_name + "_genproto", + deps = [s + "_genproto" for s in cc_deps], + protoc = "@protobuf_archive//:protoc", + visibility=["//visibility:public"], + ) + native.cc_library( + name = cc_name, + deps = cc_deps + ["@protobuf_archive//:protobuf_headers"] + + if_static([name + "_cc_impl"]), + ) + native.cc_library( + name = cc_name + "_impl", + deps = [s + "_impl" for s in cc_deps] + ["@protobuf_archive//:cc_wkt_protos"], + ) + + return + cc_proto_library( - name = name + "_cc", + name = cc_name, srcs = srcs, - deps = tf_deps(protodeps, "_cc") + ["@protobuf_archive//:cc_wkt_protos"], + deps = cc_deps + ["@protobuf_archive//:cc_wkt_protos"], cc_libs = cc_libs + if_static( ["@protobuf_archive//:protobuf"], ["@protobuf_archive//:protobuf_headers"] @@ -341,11 +365,28 @@ def tf_proto_library_cc(name, srcs = [], has_services = None, def tf_proto_library_py(name, srcs=[], protodeps=[], deps=[], visibility=[], testonly=0, srcs_version="PY2AND3", use_grpc_plugin=False): + py_deps = tf_deps(protodeps, "_py") + py_name = name + "_py" + if not srcs: + # This is a collection of sub-libraries. Build header-only and impl + # libraries containing all the sources. + proto_gen( + name = py_name + "_genproto", + deps = [s + "_genproto" for s in py_deps], + protoc = "@protobuf_archive//:protoc", + visibility=["//visibility:public"], + ) + native.py_library( + name = py_name, + deps = py_deps + ["@protobuf_archive//:protobuf_python"]) + + return + py_proto_library( - name = name + "_py", + name = py_name, srcs = srcs, srcs_version = srcs_version, - deps = deps + tf_deps(protodeps, "_py") + ["@protobuf_archive//:protobuf_python"], + deps = deps + py_deps + ["@protobuf_archive//:protobuf_python"], protoc = "@protobuf_archive//:protoc", default_runtime = "@protobuf_archive//:protobuf_python", visibility = visibility, diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index 51e856bed0..a9ddd4fc60 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -37,20 +37,25 @@ def src_to_test_name(src): def full_path(relative_paths): return [native.package_name() + "/" + relative for relative in relative_paths] +def _add_tfcore_prefix(src): + if src.startswith("//"): + return src + return "//tensorflow/core:" + src + # List of proto files for android builds def tf_android_core_proto_sources(core_proto_sources_relative): return [ - "//tensorflow/core:" + p for p in core_proto_sources_relative + _add_tfcore_prefix(p) for p in core_proto_sources_relative ] # Returns the list of pb.h and proto.h headers that are generated for # tf_android_core_proto_sources(). def tf_android_core_proto_headers(core_proto_sources_relative): return ([ - "//tensorflow/core/" + p.replace(".proto", ".pb.h") + _add_tfcore_prefix(p).replace(":", "/").replace(".proto", ".pb.h") for p in core_proto_sources_relative ] + [ - "//tensorflow/core/" + p.replace(".proto", ".proto.h") + _add_tfcore_prefix(p).replace(":", "/").replace(".proto", ".proto.h") for p in core_proto_sources_relative ]) @@ -1672,22 +1677,36 @@ def cuda_py_tests(name, # # Return a struct with fields (hdrs, srcs) containing the names of the # generated files. -def tf_generate_proto_text_sources(name, srcs_relative_dir, srcs): +def tf_generate_proto_text_sources(name, srcs_relative_dir, srcs, protodeps=[], deps=[], visibility=None): out_hdrs = ( [p.replace(".proto", ".pb_text.h") for p in srcs] + [p.replace(".proto", ".pb_text-impl.h") for p in srcs]) out_srcs = [p.replace(".proto", ".pb_text.cc") for p in srcs] native.genrule( - name=name, - srcs=srcs + [clean_dep("//tensorflow/tools/proto_text:placeholder.txt")], + name=name + "_srcs", + srcs=srcs + protodeps + [clean_dep("//tensorflow/tools/proto_text:placeholder.txt")], outs=out_hdrs + out_srcs, + visibility=visibility, cmd= "$(location //tensorflow/tools/proto_text:gen_proto_text_functions) " + "$(@D) " + srcs_relative_dir + " $(SRCS)", tools=[ clean_dep("//tensorflow/tools/proto_text:gen_proto_text_functions") ],) - return struct(hdrs=out_hdrs, srcs=out_srcs) + + native.filegroup( + name=name + "_hdrs", + srcs=out_hdrs, + visibility=visibility, + ) + + native.cc_library( + name=name, + srcs=out_srcs, + hdrs=out_hdrs, + visibility=visibility, + deps = deps, + ) def tf_genrule_cmd_append_to_srcs(to_append): return ("cat $(SRCS) > $(@) && " + "echo >> $(@) && " + "echo " + to_append + diff --git a/tensorflow/tools/proto_text/BUILD b/tensorflow/tools/proto_text/BUILD index ef7bfdd3c9..31e8fb9120 100644 --- a/tensorflow/tools/proto_text/BUILD +++ b/tensorflow/tools/proto_text/BUILD @@ -75,9 +75,14 @@ tf_proto_library_cc( ) tf_generate_proto_text_sources( - name = "test_proto_text_srcs", + name = "test_proto_text", srcs = ["test.proto"], srcs_relative_dir = "tensorflow/tools/proto_text/", + deps = [ + ":test_proto_cc", + "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + ], ) tf_cc_test( diff --git a/tensorflow/tools/proto_text/gen_proto_text_functions.cc b/tensorflow/tools/proto_text/gen_proto_text_functions.cc index f0bb59acf8..234afe879b 100644 --- a/tensorflow/tools/proto_text/gen_proto_text_functions.cc +++ b/tensorflow/tools/proto_text/gen_proto_text_functions.cc @@ -130,7 +130,11 @@ int MainImpl(int argc, char** argv) { const string path = output_root + "/" + proto_path_no_suffix + suffix; FILE* f = fopen(path.c_str(), "w"); - if (f == nullptr) return -1; + if (f == nullptr) { + // We don't expect this output to be generated. It was specified in the + // list of sources solely to satisfy a proto import dependency. + continue; + } if (fwrite(data.c_str(), 1, data.size(), f) != data.size()) { fclose(f); return -1; -- GitLab From b7f957ceedb6f47e4d68c506389bff210c35ef6a Mon Sep 17 00:00:00 2001 From: Jacques Pienaar Date: Tue, 24 Apr 2018 09:15:07 -0700 Subject: [PATCH 202/434] Add S64 clamp test. PiperOrigin-RevId: 194096814 --- .../compiler/xla/tests/vector_ops_simple_test.cc | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tensorflow/compiler/xla/tests/vector_ops_simple_test.cc b/tensorflow/compiler/xla/tests/vector_ops_simple_test.cc index 697d78fe6e..8b86b5e760 100644 --- a/tensorflow/compiler/xla/tests/vector_ops_simple_test.cc +++ b/tensorflow/compiler/xla/tests/vector_ops_simple_test.cc @@ -348,6 +348,17 @@ XLA_TEST_F(VecOpsSimpleTest, ClampTenValuesConstantNonzeroLower) { ComputeAndCompareR1(&builder, expected, {}); } +XLA_TEST_F(VecOpsSimpleTest, ClampValuesConstantS64) { + ComputationBuilder builder(client_, TestName()); + auto zero = builder.ConstantR0(0); + auto one = builder.ConstantR0(10); + auto x = builder.ConstantR1({-3, 3, 9, 13}); + auto clamp = builder.Clamp(zero, x, one); + + std::vector expected = {0, 3, 9, 10}; + ComputeAndCompareR1(&builder, expected, {}); +} + XLA_TEST_F(VecOpsSimpleTest, MapTenValues) { Computation add_half; { -- GitLab From cfedd67f5881ae3697638e9b74eccb7da9818a0e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 24 Apr 2018 09:44:52 -0700 Subject: [PATCH 203/434] Add an attr to apply_adagrad op that allows it to skip updating the accumulators. PiperOrigin-RevId: 194100678 --- tensorflow/core/kernels/training_ops.cc | 23 ++++++++++++++----- tensorflow/core/kernels/training_ops.h | 2 +- .../core/kernels/training_ops_gpu.cu.cc | 6 +++-- tensorflow/core/ops/training_ops.cc | 4 ++++ 4 files changed, 26 insertions(+), 9 deletions(-) diff --git a/tensorflow/core/kernels/training_ops.cc b/tensorflow/core/kernels/training_ops.cc index 5b13b10937..271329599f 100644 --- a/tensorflow/core/kernels/training_ops.cc +++ b/tensorflow/core/kernels/training_ops.cc @@ -153,8 +153,10 @@ struct ApplyAdagrad { void operator()(const CPUDevice& d, typename TTypes::Flat var, typename TTypes::Flat accum, typename TTypes::ConstScalar lr, - typename TTypes::ConstFlat grad) { - accum.device(d) += grad.square(); + typename TTypes::ConstFlat grad, bool update_slots) { + if (update_slots) { + accum.device(d) += grad.square(); + } var.device(d) -= grad * lr() * accum.rsqrt(); } }; @@ -1074,6 +1076,7 @@ class ApplyAdagradOp : public OpKernel { public: explicit ApplyAdagradOp(OpKernelConstruction* ctx) : OpKernel(ctx) { OP_REQUIRES_OK(ctx, ctx->GetAttr("use_locking", &use_exclusive_lock_)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("update_slots", &update_slots_)); } void Compute(OpKernelContext* ctx) override { @@ -1111,13 +1114,15 @@ class ApplyAdagradOp : public OpKernel { const Device& device = ctx->template eigen_device(); functor::ApplyAdagrad()(device, var.flat(), accum.flat(), - lr.scalar(), grad.flat()); + lr.scalar(), grad.flat(), + update_slots_); MaybeForwardRefInputToRefOutput(ctx, 0, 0); } private: bool use_exclusive_lock_; + bool update_slots_; }; #define REGISTER_KERNELS(D, T) \ @@ -1145,7 +1150,7 @@ namespace functor { void ApplyAdagrad::operator()( \ const GPUDevice& d, typename TTypes::Flat var, \ typename TTypes::Flat accum, typename TTypes::ConstScalar lr, \ - typename TTypes::ConstFlat grad); \ + typename TTypes::ConstFlat grad, bool update_slots); \ extern template struct ApplyAdagrad; DECLARE_GPU_SPEC(Eigen::half); DECLARE_GPU_SPEC(float); @@ -1266,6 +1271,7 @@ class SparseApplyAdagradOp : public OpKernel { public: explicit SparseApplyAdagradOp(OpKernelConstruction* ctx) : OpKernel(ctx) { OP_REQUIRES_OK(ctx, ctx->GetAttr("use_locking", &use_exclusive_lock_)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("update_slots", &update_slots_)); } void Compute(OpKernelContext* ctx) override NO_THREAD_SAFETY_ANALYSIS { @@ -1339,7 +1345,9 @@ class SparseApplyAdagradOp : public OpKernel { auto a = accum_flat.template chip<0>(index); auto g = grad_flat.template chip<0>(i); auto v = var_flat.template chip<0>(index); - a += g.square(); + if (update_slots_) { + a += g.square(); + } v -= g.constant(lr_scalar) * g * a.rsqrt(); } } else { @@ -1358,7 +1366,9 @@ class SparseApplyAdagradOp : public OpKernel { " in indices is out of range"))); T& a = accum_flat(index); const T& g = grad_flat(i); - a += g * g; + if (update_slots_) { + a += g * g; + } var_flat(index) -= lr_scalar * g / Eigen::numext::sqrt(a); } } @@ -1369,6 +1379,7 @@ class SparseApplyAdagradOp : public OpKernel { private: bool use_exclusive_lock_; + bool update_slots_; }; #define REGISTER_KERNELS(T, Tindices) \ diff --git a/tensorflow/core/kernels/training_ops.h b/tensorflow/core/kernels/training_ops.h index f536a61eb0..495a94f1a1 100644 --- a/tensorflow/core/kernels/training_ops.h +++ b/tensorflow/core/kernels/training_ops.h @@ -68,7 +68,7 @@ struct ApplyAdagrad { void operator()(const Device& d, typename TTypes::Flat var, typename TTypes::Flat accum, typename TTypes::ConstScalar lr, - typename TTypes::ConstFlat grad); + typename TTypes::ConstFlat grad, bool update_slots); }; template diff --git a/tensorflow/core/kernels/training_ops_gpu.cu.cc b/tensorflow/core/kernels/training_ops_gpu.cu.cc index 2aa17f2a0f..4bd32592db 100644 --- a/tensorflow/core/kernels/training_ops_gpu.cu.cc +++ b/tensorflow/core/kernels/training_ops_gpu.cu.cc @@ -42,8 +42,10 @@ struct ApplyAdagrad { void operator()(const GPUDevice& d, typename TTypes::Flat var, typename TTypes::Flat accum, typename TTypes::ConstScalar lr, - typename TTypes::ConstFlat grad) { - accum.device(d) += grad.square(); + typename TTypes::ConstFlat grad, bool update_slots) { + if (update_slots) { + accum.device(d) += grad.square(); + } Eigen::array::Tensor::Index, 1> bcast; bcast[0] = grad.dimension(0); Eigen::Sizes<1> single; diff --git a/tensorflow/core/ops/training_ops.cc b/tensorflow/core/ops/training_ops.cc index dc7b588898..94ff092a85 100644 --- a/tensorflow/core/ops/training_ops.cc +++ b/tensorflow/core/ops/training_ops.cc @@ -253,6 +253,7 @@ REGISTER_OP("ApplyAdagrad") .Output("out: Ref(T)") .Attr("T: numbertype") .Attr("use_locking: bool = false") + .Attr("update_slots: bool = true") .SetShapeFn([](InferenceContext* c) { return ApplyAdagradShapeFn(c, false /* sparse */); }); @@ -264,6 +265,7 @@ REGISTER_OP("ResourceApplyAdagrad") .Input("grad: T") .Attr("T: numbertype") .Attr("use_locking: bool = false") + .Attr("update_slots: bool = true") .SetShapeFn([](InferenceContext* c) { return ApplyAdagradShapeFn(c, false /* sparse */); }); @@ -320,6 +322,7 @@ REGISTER_OP("SparseApplyAdagrad") .Attr("T: numbertype") .Attr("Tindices: {int32, int64}") .Attr("use_locking: bool = false") + .Attr("update_slots: bool = true") .SetShapeFn([](InferenceContext* c) { return ApplyAdagradShapeFn(c, true /* sparse */); }); @@ -333,6 +336,7 @@ REGISTER_OP("ResourceSparseApplyAdagrad") .Attr("T: numbertype") .Attr("Tindices: {int32, int64}") .Attr("use_locking: bool = false") + .Attr("update_slots: bool = true") .SetShapeFn([](InferenceContext* c) { return ApplyAdagradShapeFn(c, true /* sparse */); }); -- GitLab From 9c7e819352581bf5a97509b1fa5dc71dffa26500 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 24 Apr 2018 10:24:26 -0700 Subject: [PATCH 204/434] Enable all arithmetic optimizations by default. PiperOrigin-RevId: 194106835 --- .../core/grappler/optimizers/arithmetic_optimizer.h | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h index c0fe8839ca..344c8281eb 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h @@ -57,9 +57,9 @@ class ArithmeticOptimizer : public GraphOptimizer { // TODO(ezhulenev): flag do disable TrySimplifyAndReplaceUses in tests. // Remove when all optimizers will be migrated to separate stages. bool enable_try_simplify_and_replace = true; - bool combine_add_to_addn = false; + bool combine_add_to_addn = true; bool hoist_common_factor_out_of_aggregation = true; - bool minimize_broadcasts = false; + bool minimize_broadcasts = true; bool remove_identity_transpose = true; bool remove_redundant_bitcast = true; bool remove_redundant_cast = true; @@ -70,11 +70,6 @@ class ArithmeticOptimizer : public GraphOptimizer { static ArithmeticOptimizerOptions Default( RewriterConfig::Toggle opt_level) { ArithmeticOptimizerOptions options; - // TODO(ezhulenev): enable by default after 1.8 release cut - if (opt_level == RewriterConfig::AGGRESSIVE) { - options.combine_add_to_addn = true; - options.minimize_broadcasts = true; - } return options; } }; -- GitLab From 55a4a479df8e1fbc8aa726596e6d4591364b3585 Mon Sep 17 00:00:00 2001 From: Sherry Moore Date: Tue, 24 Apr 2018 10:31:17 -0700 Subject: [PATCH 205/434] Added a call in CheckpointSaverHook.after_create_session to always save checkpoint before the first training step. PiperOrigin-RevId: 194107958 --- .../python/learn/estimators/estimator_test.py | 4 +- tensorflow/python/estimator/estimator_test.py | 4 +- .../training/basic_session_run_hooks.py | 36 ++++++++++-------- .../training/basic_session_run_hooks_test.py | 38 ++++++++++++++++--- 4 files changed, 58 insertions(+), 24 deletions(-) diff --git a/tensorflow/contrib/learn/python/learn/estimators/estimator_test.py b/tensorflow/contrib/learn/python/learn/estimators/estimator_test.py index d81a534b79..9e5aaf3118 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/estimator_test.py +++ b/tensorflow/contrib/learn/python/learn/estimators/estimator_test.py @@ -715,7 +715,9 @@ class EstimatorTest(test.TestCase): ckpt = checkpoint_state_pb2.CheckpointState() text_format.Merge(checkpoint_file_content, ckpt) self.assertEqual(ckpt.model_checkpoint_path, 'model.ckpt-5') - self.assertAllEqual(['model.ckpt-1', 'model.ckpt-5'], + # TODO(b/78461127): Please modify tests to not directly rely on names of + # checkpoints. + self.assertAllEqual(['model.ckpt-0', 'model.ckpt-5'], ckpt.all_model_checkpoint_paths) def test_train_save_copy_reload(self): diff --git a/tensorflow/python/estimator/estimator_test.py b/tensorflow/python/estimator/estimator_test.py index d453e19357..0fea86124c 100644 --- a/tensorflow/python/estimator/estimator_test.py +++ b/tensorflow/python/estimator/estimator_test.py @@ -679,8 +679,10 @@ class EstimatorTrainTest(test.TestCase): ckpt = checkpoint_state_pb2.CheckpointState() text_format.Merge(checkpoint_file_content, ckpt) self.assertEqual(ckpt.model_checkpoint_path, 'model.ckpt-5') + # TODO(b/78461127): Please modify tests to not directly rely on names of + # checkpoints. self.assertAllEqual( - ['model.ckpt-1', 'model.ckpt-5'], ckpt.all_model_checkpoint_paths) + ['model.ckpt-0', 'model.ckpt-5'], ckpt.all_model_checkpoint_paths) def test_train_save_copy_reload(self): tmpdir = tempfile.mkdtemp() diff --git a/tensorflow/python/training/basic_session_run_hooks.py b/tensorflow/python/training/basic_session_run_hooks.py index 3651291bdf..47339e057f 100644 --- a/tensorflow/python/training/basic_session_run_hooks.py +++ b/tensorflow/python/training/basic_session_run_hooks.py @@ -434,23 +434,27 @@ class CheckpointSaverHook(session_run_hook.SessionRunHook): for l in self._listeners: l.begin() - def before_run(self, run_context): # pylint: disable=unused-argument - if self._timer.last_triggered_step() is None: - # We do write graph and saver_def at the first call of before_run. - # We cannot do this in begin, since we let other hooks to change graph and - # add variables in begin. Graph is finalized after all begin calls. - training_util.write_graph( - ops.get_default_graph().as_graph_def(add_shapes=True), - self._checkpoint_dir, - "graph.pbtxt") - saver_def = self._get_saver().saver_def if self._get_saver() else None - graph = ops.get_default_graph() - meta_graph_def = meta_graph.create_meta_graph_def( - graph_def=graph.as_graph_def(add_shapes=True), - saver_def=saver_def) - self._summary_writer.add_graph(graph) - self._summary_writer.add_meta_graph(meta_graph_def) + def after_create_session(self, session, coord): + global_step = session.run(self._global_step_tensor) + # We do write graph and saver_def at the first call of before_run. + # We cannot do this in begin, since we let other hooks to change graph and + # add variables in begin. Graph is finalized after all begin calls. + training_util.write_graph( + ops.get_default_graph().as_graph_def(add_shapes=True), + self._checkpoint_dir, + "graph.pbtxt") + saver_def = self._get_saver().saver_def if self._get_saver() else None + graph = ops.get_default_graph() + meta_graph_def = meta_graph.create_meta_graph_def( + graph_def=graph.as_graph_def(add_shapes=True), + saver_def=saver_def) + self._summary_writer.add_graph(graph) + self._summary_writer.add_meta_graph(meta_graph_def) + # The checkpoint saved here is the state at step "global_step". + self._save(session, global_step) + self._timer.update_last_triggered_step(global_step) + def before_run(self, run_context): # pylint: disable=unused-argument return SessionRunArgs(self._global_step_tensor) def after_run(self, run_context, run_values): diff --git a/tensorflow/python/training/basic_session_run_hooks_test.py b/tensorflow/python/training/basic_session_run_hooks_test.py index 25962f6bf7..31898562f8 100644 --- a/tensorflow/python/training/basic_session_run_hooks_test.py +++ b/tensorflow/python/training/basic_session_run_hooks_test.py @@ -466,8 +466,8 @@ class CheckpointSaverHookTest(test.TestCase): self.assertEqual(2, global_step_val) self.assertEqual({ 'begin': 1, - 'before_save': 2, - 'after_save': 2, + 'before_save': 3, + 'after_save': 3, 'end': 1 }, listener_counts) @@ -490,8 +490,8 @@ class CheckpointSaverHookTest(test.TestCase): self.assertEqual(2, global_step_val) self.assertEqual({ 'begin': 1, - 'before_save': 2, - 'after_save': 2, + 'before_save': 3, + 'after_save': 3, 'end': 1 }, listener_counts) @@ -523,8 +523,8 @@ class CheckpointSaverHookTest(test.TestCase): self.assertEqual(2, global_step_val) self.assertEqual({ 'begin': 1, - 'before_save': 2, - 'after_save': 2, + 'before_save': 3, + 'after_save': 3, 'end': 1 }, listener1_counts) self.assertEqual(listener1_counts, listener2_counts) @@ -706,6 +706,7 @@ class CheckpointSaverHookTest(test.TestCase): with session_lib.Session() as sess: sess.run(self.scaffold.init_op) mon_sess = monitored_session._HookedSession(sess, [hook]) + hook.after_create_session(sess, None) mon_sess.run(self.train_op) summary_writer.assert_summaries( test_case=self, @@ -718,6 +719,31 @@ class CheckpointSaverHookTest(test.TestCase): fake_summary_writer.FakeSummaryWriter.uninstall() + def test_save_checkpoint_before_first_train_step(self): + with self.graph.as_default(): + hook = basic_session_run_hooks.CheckpointSaverHook( + self.model_dir, save_steps=2, scaffold=self.scaffold) + hook.begin() + self.scaffold.finalize() + with session_lib.Session() as sess: + mon_sess = monitored_session._HookedSession(sess, [hook]) + sess.run(self.scaffold.init_op) + hook.after_create_session(sess, None) + # Verifies that checkpoint is saved at step 0. + self.assertEqual(0, + checkpoint_utils.load_variable(self.model_dir, + self.global_step.name)) + # Verifies that no checkpoint is saved after one training step. + mon_sess.run(self.train_op) + self.assertEqual(0, + checkpoint_utils.load_variable(self.model_dir, + self.global_step.name)) + # Verifies that checkpoint is saved after save_steps. + mon_sess.run(self.train_op) + self.assertEqual(2, + checkpoint_utils.load_variable(self.model_dir, + self.global_step.name)) + class CheckpointSaverHookMultiStepTest(test.TestCase): -- GitLab From f6ae3d54b0700ba76b56ebe3c702440f39460d2e Mon Sep 17 00:00:00 2001 From: Guangda Lai Date: Tue, 24 Apr 2018 10:51:08 -0700 Subject: [PATCH 206/434] Split gpu_id library to a header library and an implementation, so when if_static is false and we're building shared objects that depend on gpu_id, the implementation won't get linked. PiperOrigin-RevId: 194111330 --- tensorflow/core/BUILD | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 843fd7b907..bda87c6aed 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -2563,6 +2563,19 @@ tf_cuda_library( cc_library( name = "gpu_id", + hdrs = [ + "common_runtime/gpu/gpu_id.h", + "common_runtime/gpu/gpu_id_manager.h", + ], + deps = [ + ":lib", + ] + if_static([ + ":gpu_id_impl", + ]), +) + +cc_library( + name = "gpu_id_impl", srcs = ["common_runtime/gpu/gpu_id_manager.cc"], hdrs = [ "common_runtime/gpu/gpu_id.h", @@ -2612,7 +2625,7 @@ tf_cuda_library( ":core_cpu_lib", ":framework", ":framework_internal", - ":gpu_id", + ":gpu_id_impl", ":gpu_init_impl", ":gpu_lib", ":graph", -- GitLab From 09398096284995d8a93c124bdbd70d6e1a44fbc3 Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Tue, 24 Apr 2018 10:59:10 -0700 Subject: [PATCH 207/434] Update README.md --- tensorflow/tools/docker/README.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tensorflow/tools/docker/README.md b/tensorflow/tools/docker/README.md index f46c56e11a..525f2995ce 100644 --- a/tensorflow/tools/docker/README.md +++ b/tensorflow/tools/docker/README.md @@ -16,12 +16,12 @@ quick links here: We currently maintain two Docker container images: -* `gcr.io/tensorflow/tensorflow` - TensorFlow with all dependencies - CPU only! +* `tensorflow/tensorflow` - TensorFlow with all dependencies - CPU only! -* `gcr.io/tensorflow/tensorflow:latest-gpu` - TensorFlow with all dependencies +* `tensorflow/tensorflow:latest-gpu` - TensorFlow with all dependencies and support for NVidia CUDA -Note: We also publish the same containers into +Note: We store all our containers on [Docker Hub](https://hub.docker.com/r/tensorflow/tensorflow/tags/). @@ -29,12 +29,12 @@ Note: We also publish the same containers into Run non-GPU container using - $ docker run -it -p 8888:8888 gcr.io/tensorflow/tensorflow + $ docker run -it -p 8888:8888 tensorflow/tensorflow For GPU support install NVidia drivers (ideally latest) and [nvidia-docker](https://github.com/NVIDIA/nvidia-docker). Run using - $ nvidia-docker run -it -p 8888:8888 gcr.io/tensorflow/tensorflow:latest-gpu + $ nvidia-docker run -it -p 8888:8888 tensorflow/tensorflow:latest-gpu Note: If you would have a problem running nvidia-docker you may try the old method @@ -44,7 +44,7 @@ it there and try using nvidia-docker as described above. $ # The old, not recommended way to run docker with gpu support: $ export CUDA_SO=$(\ls /usr/lib/x86_64-linux-gnu/libcuda.* | xargs -I{} echo '-v {}:{}') $ export DEVICES=$(\ls /dev/nvidia* | xargs -I{} echo '--device {}:{}') - $ docker run -it -p 8888:8888 $CUDA_SO $DEVICES gcr.io/tensorflow/tensorflow:latest-gpu + $ docker run -it -p 8888:8888 $CUDA_SO $DEVICES tensorflow/tensorflow:latest-gpu ## More containers -- GitLab From b7b7ec32b848d6f5a7cf432fb44ceed4c9587078 Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Tue, 24 Apr 2018 10:57:00 -0700 Subject: [PATCH 208/434] Add note that setting LD_LIBRARY_PATH after having already kicked off a build requires a clean rebuild. PiperOrigin-RevId: 194112367 --- tensorflow/docs_src/install/install_sources.md | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tensorflow/docs_src/install/install_sources.md b/tensorflow/docs_src/install/install_sources.md index b186758653..71f066e4cb 100644 --- a/tensorflow/docs_src/install/install_sources.md +++ b/tensorflow/docs_src/install/install_sources.md @@ -393,9 +393,9 @@ If you are new to TensorFlow, see @{$get_started/premade_estimators$Getting Star If the system outputs an error message instead of a greeting, see [Common installation problems](#common_installation_problems). -## Common installation problems +## Common build and installation problems -The installation problems you encounter typically depend on the +The build and installation problems you encounter typically depend on the operating system. See the "Common installation problems" section of one of the following guides: @@ -448,6 +448,11 @@ Stack Overflow and specify the `tensorflow` tag. + + + + +
Stack Overflow Link Error Message
Link to GitHub or Stack Overflow Error Message
36159194
47080760
undefined reference to `cublasGemmEx@libcublas.so.9.0'
## Tested source configurations -- GitLab From 052c53c27956251e4b4952cd862596a9c08584e4 Mon Sep 17 00:00:00 2001 From: Billy Lamberta Date: Tue, 24 Apr 2018 11:09:09 -0700 Subject: [PATCH 209/434] Review fixes to install_linux --- tensorflow/docs_src/install/install_linux.md | 119 +++++++++++++------ 1 file changed, 84 insertions(+), 35 deletions(-) diff --git a/tensorflow/docs_src/install/install_linux.md b/tensorflow/docs_src/install/install_linux.md index 9b431e49ee..fa82ac9c40 100644 --- a/tensorflow/docs_src/install/install_linux.md +++ b/tensorflow/docs_src/install/install_linux.md @@ -116,23 +116,47 @@ There are a few options to install TensorFlow on your machine: ### Use `pip` in a virtual environment -This is the *recommended* install method. The -[Virtualenv](https://virtualenv.pypa.io/en/stable/) tool creates virtual Python -environments that are isolated from other Python development on the same machine. -In this scenario, you install TensorFlow and its dependencies within a virtual -environment that is available when *activated*. Virtualenv provides a reliable -way to install and run TensorFlow while avoiding conflicts with the rest of the -system. +Key Point: Using a virtual environment is the recommended install method. -1\. On Ubuntu, install the `pip` and `virtualenv` packages: +The [Virtualenv](https://virtualenv.pypa.io/en/stable/) tool creates virtual +Python environments that are isolated from other Python development on the same +machine. In this scenario, you install TensorFlow and its dependencies within a +virtual environment that is available when *activated*. Virtualenv provides a +reliable way to install and run TensorFlow while avoiding conflicts with the rest +of the system. + +##### 1. Install Python, `pip`, and `virtualenv`. + +On Ubuntu, Python is automatically installed and `pip` is *usually* installed. +Confirm the `python` and `pip` versions: + +
+  python -V
+  pip -V  # or: pip3 -V
+
+ +To install these packages on Ubuntu:
   sudo apt-get install python-pip python-dev python-virtualenv   # for Python 2.7
   sudo apt-get install python3-pip python3-dev python-virtualenv # for Python 3.n
 
-2\. Create a directory for the virtual environment and choose a Python -interpreter: +We *recommend* using `pip` version 8.1 or higher. If using a release before +version 8.1, upgrade `pip`: + +
+  sudo pip install -U pip
+
+ +If not using Ubuntu and [setuptools](https://pypi.org/project/setuptools/) is +installed, use `easy_install` to install `pip`: + +
+  easy_install -U pip
+
+ +##### 2. Create a directory for the virtual environment and choose a Python interpreter.
   mkdir ~/tensorflow  # somewhere to work out of
@@ -142,7 +166,9 @@ interpreter:
   virtualenv --system-site-packages -p python3 venv # Use Python 3.n
 
-3\. Activate the Virtualenv environment using one of these shell commands: +##### 3. Activate the Virtualenv environment. + +Use one of these shell-specific commands to activate the virtual environment:
   source ~/tensorflow/venv/bin/activate      # bash, sh, ksh, or zsh
@@ -152,26 +178,32 @@ interpreter:
 
 When the Virtualenv is activated, the shell prompt displays as `(venv) $`.
 
-4\. Upgrade `pip` in your virtual environment:
+##### 4. Upgrade `pip` in the virtual environment.
 
-See the [pip installation guide](https://pip.pypa.io/en/stable/installing/) for
-instructions, or use `easy_install`:
+Within the active virtual environment, upgrade `pip`:
 
 
-(venv)$ easy_install -U pip
+(venv)$ pip install -U pip
 
-5\. Within an active Virtualenv environment, use one of the following `pip` -commands to install the TensorFlow package: +You can install other Python packages within the virtual environment without +affecting packages outside the `virtualenv`. + +##### 5. Install TensorFlow in the virtual environment. + +Choose one of the available TensorFlow packages for installation: + +* `tensorflow` —Current release for CPU +* `tensorflow-gpu` —Current release with GPU support +* `tf-nightly` —Nightly build for CPU +* `tf-nightly-gpu` —Nightly build with GPU support + +Within an active Virtualenv environment, use `pip` to install the package:
-(venv)$ pip install --upgrade tensorflow      # for Python 2.7
-(venv)$ pip3 install --upgrade tensorflow     # for Python 3.n
-(venv)$ pip install --upgrade tensorflow-gpu  # for Python 2.7 and GPU
+  pip install -U tensorflow
 
-Success! TensorFlow is now installed. - Use `pip list` to show the packages installed in the virtual environment. [Validate the install](#ValidateYourInstallation) and test the version: @@ -179,6 +211,8 @@ Use `pip list` to show the packages installed in the virtual environment. (venv)$ python -c "import tensorflow as tf; print(tf.__version__)"
+Success: TensorFlow is now installed. + Use the `deactivate` command to stop the Python virtual environment. #### Problems @@ -222,10 +256,9 @@ environment, a system `pip` install is straightforward. See the [REQUIRED_PACKAGES section of setup.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tools/pip_package/setup.py) -for a list of TensorFlow packages that `pip` installs or upgrade`. +for a list of packages that TensorFlow installs. - -#### Install Python and `pip` +##### 1. Install Python, `pip`, and `virtualenv`. On Ubuntu, Python is automatically installed and `pip` is *usually* installed. Confirm the `python` and `pip` versions: @@ -235,28 +268,42 @@ Confirm the `python` and `pip` versions: pip -V # or: pip3 -V -We *strongly recommend* `pip` or `pip3` version 8.1 or higher. If using a release -before version 8.1, upgrade `pip`: +To install these packages on Ubuntu:
   sudo apt-get install python-pip python-dev   # for Python 2.7
   sudo apt-get install python3-pip python3-dev # for Python 3.n
 
+We *recommend* using `pip` version 8.1 or higher. If using a release before +version 8.1, upgrade `pip`: -#### Install TensorFlow +
+  sudo pip install -U pip
+
-Install one of the available TensorFlow packages: +If not using Ubuntu and [setuptools](https://pypi.org/project/setuptools/) is +installed, use `easy_install` to install `pip`:
-  # Select one:
-  sudo pip install tensorflow      # Python 2.7 CPU (no GPU support)
-  sudo pip3 install tensorflow     # Python 3.n CPU (no GPU support)
-  sudo pip install tensorflow-gpu  # Python 2.7 GPU support
-  sudo pip3 install tensorflow-gpu # Python 3.n GPU support
+  easy_install -U pip
 
-Success! TensorFlow is now installed. +##### 2. Install TensorFlow on system. + +Choose one of the available TensorFlow packages for installation: + +* `tensorflow` —Current release for CPU +* `tensorflow-gpu` —Current release with GPU support +* `tf-nightly` —Nightly build for CPU +* `tf-nightly-gpu` —Nightly build with GPU support + +And use `pip` to install the package for Python 2 or 3: + +
+  sudo pip install -U tensorflow   # Python 2.7
+  sudo pip3 install -U tensorflow  # Python 3.n
+
Use `pip list` to show the packages installed on the system. [Validate the install](#ValidateYourInstallation) and test the version: @@ -265,6 +312,8 @@ Use `pip list` to show the packages installed on the system. python -c "import tensorflow as tf; print(tf.__version__)" +Success: TensorFlow is now installed. + #### Problems If the above steps failed, try installing the TensorFlow binary using the remote -- GitLab From aeaec69869f13fc37c3ed28881741dd344e6a150 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 24 Apr 2018 11:18:47 -0700 Subject: [PATCH 210/434] Update ops-related pbtxt files. PiperOrigin-RevId: 194116315 --- .../core/ops/compat/ops_history.v1.pbtxt | 276 ++++++++++++++++++ tensorflow/core/ops/ops.pbtxt | 28 ++ 2 files changed, 304 insertions(+) diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt index 05dee30ca0..701897f162 100644 --- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt +++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt @@ -2121,6 +2121,71 @@ op { } } } +op { + name: "ApplyAdagrad" + input_arg { + name: "var" + type_attr: "T" + is_ref: true + } + input_arg { + name: "accum" + type_attr: "T" + is_ref: true + } + input_arg { + name: "lr" + type_attr: "T" + } + input_arg { + name: "grad" + type_attr: "T" + } + output_arg { + name: "out" + type_attr: "T" + is_ref: true + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_INT64 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_BFLOAT16 + type: DT_UINT16 + type: DT_COMPLEX128 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } + attr { + name: "use_locking" + type: "bool" + default_value { + b: false + } + } + attr { + name: "update_slots" + type: "bool" + default_value { + b: true + } + } +} op { name: "ApplyAdagradDA" input_arg { @@ -43524,6 +43589,65 @@ op { } is_stateful: true } +op { + name: "ResourceApplyAdagrad" + input_arg { + name: "var" + type: DT_RESOURCE + } + input_arg { + name: "accum" + type: DT_RESOURCE + } + input_arg { + name: "lr" + type_attr: "T" + } + input_arg { + name: "grad" + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_INT64 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_BFLOAT16 + type: DT_UINT16 + type: DT_COMPLEX128 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } + attr { + name: "use_locking" + type: "bool" + default_value { + b: false + } + } + attr { + name: "update_slots" + type: "bool" + default_value { + b: true + } + } + is_stateful: true +} op { name: "ResourceApplyAdagradDA" input_arg { @@ -47876,6 +48000,79 @@ op { } is_stateful: true } +op { + name: "ResourceSparseApplyAdagrad" + input_arg { + name: "var" + type: DT_RESOURCE + } + input_arg { + name: "accum" + type: DT_RESOURCE + } + input_arg { + name: "lr" + type_attr: "T" + } + input_arg { + name: "grad" + type_attr: "T" + } + input_arg { + name: "indices" + type_attr: "Tindices" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_INT64 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_BFLOAT16 + type: DT_UINT16 + type: DT_COMPLEX128 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } + attr { + name: "Tindices" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } + attr { + name: "use_locking" + type: "bool" + default_value { + b: false + } + } + attr { + name: "update_slots" + type: "bool" + default_value { + b: true + } + } + is_stateful: true +} op { name: "ResourceSparseApplyAdagradDA" input_arg { @@ -58622,6 +58819,85 @@ op { } } } +op { + name: "SparseApplyAdagrad" + input_arg { + name: "var" + type_attr: "T" + is_ref: true + } + input_arg { + name: "accum" + type_attr: "T" + is_ref: true + } + input_arg { + name: "lr" + type_attr: "T" + } + input_arg { + name: "grad" + type_attr: "T" + } + input_arg { + name: "indices" + type_attr: "Tindices" + } + output_arg { + name: "out" + type_attr: "T" + is_ref: true + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_INT64 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_BFLOAT16 + type: DT_UINT16 + type: DT_COMPLEX128 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } + attr { + name: "Tindices" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } + attr { + name: "use_locking" + type: "bool" + default_value { + b: false + } + } + attr { + name: "update_slots" + type: "bool" + default_value { + b: true + } + } +} op { name: "SparseApplyAdagradDA" input_arg { diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index 2edd15c446..eb43c6fdfb 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -891,6 +891,13 @@ op { b: false } } + attr { + name: "update_slots" + type: "bool" + default_value { + b: true + } + } } op { name: "ApplyAdagradDA" @@ -21784,6 +21791,13 @@ op { b: false } } + attr { + name: "update_slots" + type: "bool" + default_value { + b: true + } + } is_stateful: true } op { @@ -23150,6 +23164,13 @@ op { b: false } } + attr { + name: "update_slots" + type: "bool" + default_value { + b: true + } + } is_stateful: true } op { @@ -27187,6 +27208,13 @@ op { b: false } } + attr { + name: "update_slots" + type: "bool" + default_value { + b: true + } + } } op { name: "SparseApplyAdagradDA" -- GitLab From 4a82acf286df1bc10581d91e13e0ab17458e83b4 Mon Sep 17 00:00:00 2001 From: Raghuraman Krishnamoorthi Date: Tue, 24 Apr 2018 11:20:04 -0700 Subject: [PATCH 211/434] Improve handling of scopes in folding unfused batch norms. This change allows folding to work for MobilenetV2 with unfused batch norms PiperOrigin-RevId: 194116535 --- .../quantize/python/fold_batch_norms.py | 24 +++++- .../quantize/python/fold_batch_norms_test.py | 79 +++++++++++++++++++ 2 files changed, 100 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/quantize/python/fold_batch_norms.py b/tensorflow/contrib/quantize/python/fold_batch_norms.py index aa0ef64308..6f41722748 100644 --- a/tensorflow/contrib/quantize/python/fold_batch_norms.py +++ b/tensorflow/contrib/quantize/python/fold_batch_norms.py @@ -501,8 +501,27 @@ def _GetBatchNormParams(graph, context, has_scaling): bn_decay_var_tensor = None split_context = context.split('/') - base_context = split_context[-1] - + # Matching variable names is brittle and relies on scoping + # conventions. Fused batch norm folding is more robust. Support for unfused + # batch norms will be deprecated as we move forward. Fused batch norms allow + # for faster training and should be used whenever possible. + # context contains part of the names of the tensors we are interested in: + # For MobilenetV1, the context has repetitions: + # MobilenetV1/MobilenetV1/Conv2d_3_depthwise + # when the moving_mean tensor has the name: + # MobilenetV1/Conv2d_3_depthwise/BatchNorm/moving_mean/read + # To pick the correct variable name, it is necessary to ignore the repeating + # header. + + # For MobilenetV2, this problem does not exist: + # The context is: MobilenetV2/expanded_conv_3/depthwise + # and the names of the tensors start with a single MobilenetV2 + # The moving mean for example, has the name: + # MobilenetV2/expanded_conv_3/depthwise/BatchNorm/moving_mean/read + # We ignore the first string (MobilenetV1 or MobilenetV2) + # in the context to match correctly in both cases + + base_context = '/'.join(split_context[1:]) oplist = graph.get_operations() op_suffix_mean = base_context + '/BatchNorm/moments/Squeeze' op_suffix_variance = base_context + '/BatchNorm/moments/Squeeze_1' @@ -520,7 +539,6 @@ def _GetBatchNormParams(graph, context, has_scaling): op_suffix_gamma = base_context + '/BatchNorm/gamma' op_suffix_moving_variance = base_context + '/BatchNorm/moving_variance/read' op_suffix_moving_mean = base_context + '/BatchNorm/moving_mean/read' - # Parse through list of ops to find relevant ops for op in oplist: if op.name.endswith(op_suffix_mean): diff --git a/tensorflow/contrib/quantize/python/fold_batch_norms_test.py b/tensorflow/contrib/quantize/python/fold_batch_norms_test.py index af31467476..64e8142e7c 100644 --- a/tensorflow/contrib/quantize/python/fold_batch_norms_test.py +++ b/tensorflow/contrib/quantize/python/fold_batch_norms_test.py @@ -134,6 +134,85 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase): def testFoldConv2d(self): self._RunTestOverParameters(self._TestFoldConv2d) + def testMultipleLayerConv2d(self, + relu=nn_ops.relu, + relu_op_name='Relu', + has_scaling=True, + fused_batch_norm=False, + freeze_batch_norm_delay=None): + """Tests folding cases for a network with multiple layers. + + Args: + relu: Callable that returns an Operation, a factory method for the Relu*. + relu_op_name: String, name of the Relu* operation. + has_scaling: Bool, when true the batch norm has scaling. + fused_batch_norm: Bool, when true the batch norm is fused. + freeze_batch_norm_delay: None or the number of steps after which training + switches to using frozen mean and variance + """ + g = ops.Graph() + with g.as_default(): + batch_size, height, width = 5, 128, 128 + inputs = array_ops.zeros((batch_size, height, width, 3)) + out_depth = 3 + stride = 1 + activation_fn = relu + scope = 'network/expanded_conv_1/conv' + layer1 = conv2d( + inputs, + out_depth, [5, 5], + stride=stride, + padding='SAME', + weights_initializer=self._WeightInit(0.09), + activation_fn=activation_fn, + normalizer_fn=batch_norm, + normalizer_params=self._BatchNormParams( + scale=has_scaling, fused=fused_batch_norm), + scope=scope) + # Add another layer + scope = 'network/expanded_conv_2/conv' + + _ = conv2d( + layer1, + 2 * out_depth, [5, 5], + stride=stride, + padding='SAME', + weights_initializer=self._WeightInit(0.09), + activation_fn=activation_fn, + normalizer_fn=batch_norm, + normalizer_params=self._BatchNormParams( + scale=has_scaling, fused=fused_batch_norm), + scope=scope) + + fold_batch_norms.FoldBatchNorms( + g, is_training=True, freeze_batch_norm_delay=freeze_batch_norm_delay) + folded_mul = g.get_operation_by_name(scope + '/mul_fold') + self.assertEqual(folded_mul.type, 'Mul') + self._AssertInputOpsAre(folded_mul, [ + scope + '/correction_mult', + self._BatchNormMultiplierName(scope, has_scaling, fused_batch_norm) + ]) + self._AssertOutputGoesToOps(folded_mul, g, [scope + '/Conv2D_Fold']) + + folded_conv = g.get_operation_by_name(scope + '/Conv2D_Fold') + self.assertEqual(folded_conv.type, 'Conv2D') + # Remove :0 at end of name for tensor prior to comparison + self._AssertInputOpsAre(folded_conv, + [scope + '/mul_fold', layer1.name[:-2]]) + self._AssertOutputGoesToOps(folded_conv, g, [scope + '/post_conv_mul']) + + folded_add = g.get_operation_by_name(scope + '/add_fold') + self.assertEqual(folded_add.type, 'Add') + self._AssertInputOpsAre(folded_add, [ + scope + '/correction_add', + self._BathNormBiasName(scope, fused_batch_norm) + ]) + output_op_names = [scope + '/' + relu_op_name] + self._AssertOutputGoesToOps(folded_add, g, output_op_names) + + for op in g.get_operations(): + self.assertFalse('//' in op.name, 'Double slash in op %s' % op.name) + def _TestFoldConv2dUnknownShape(self, relu, relu_op_name, with_bypass, has_scaling, fused_batch_norm, freeze_batch_norm_delay): -- GitLab From 9d2972e6ceb4911458e867d75466e14a31fa1773 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 24 Apr 2018 11:22:49 -0700 Subject: [PATCH 212/434] show breakdown of execution cost with compute and memory cost for op summarization PiperOrigin-RevId: 194117030 --- .../core/grappler/costs/virtual_scheduler.cc | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/tensorflow/core/grappler/costs/virtual_scheduler.cc b/tensorflow/core/grappler/costs/virtual_scheduler.cc index 0e5c654acf..7f68272950 100644 --- a/tensorflow/core/grappler/costs/virtual_scheduler.cc +++ b/tensorflow/core/grappler/costs/virtual_scheduler.cc @@ -850,14 +850,16 @@ Costs VirtualScheduler::Summary() const { VLOG(1) << "Expected max per-op streaming buffers: " << graph_costs_.max_per_op_streaming; - VLOG(1) << "Per-op execution time:"; + VLOG(1) << "Per-op execution time / compute time / memory time:"; for (const auto& op_cost_pair : op_to_cost_) { const auto& op = op_cost_pair.first; const auto& cost = op_cost_pair.second.execution_time.count(); + const auto& compute_cost = op_cost_pair.second.compute_time.count(); + const auto& memory_cost = op_cost_pair.second.memory_time.count(); const bool is_op_cost_accurate = !op_cost_pair.second.inaccurate; if (cost) { // Skip printing out zero-cost ops. VLOG(1) << " + " << op << " : " << (is_op_cost_accurate ? "" : "~") - << cost; + << cost << " / " << compute_cost << " / " << memory_cost; } } @@ -898,7 +900,8 @@ Costs VirtualScheduler::Summary() const { << ", at the end: " << strings::HumanReadableNumBytes(state.memory_usage); - VLOG(1) << "Per-op execution time (and memory usage at peak memory usage):"; + VLOG(1) << "Per-op execution time compute time / memory time " + "(and memory usage at peak memory usage):"; // Profile non-persistent op memory usage. for (const auto& node_port : state.mem_usage_snapshot_at_peak) { @@ -912,6 +915,8 @@ Costs VirtualScheduler::Summary() const { for (const auto& op_cost_pair : state.op_to_cost) { const auto& op = op_cost_pair.first; const auto& cost = op_cost_pair.second.execution_time.count(); + const auto& compute_cost = op_cost_pair.second.compute_time.count(); + const auto& memory_cost = op_cost_pair.second.memory_time.count(); total_compute_time_ns += op_cost_pair.second.execution_time; const bool is_op_cost_accurate = !op_cost_pair.second.inaccurate; if (!is_op_cost_accurate) { @@ -930,8 +935,9 @@ Costs VirtualScheduler::Summary() const { if (cost || mem_usage_percent > 1.0) { // Print out only non-zero cost ops or ops with > 1% memory usage. VLOG(1) << " + " << op << " : " << (is_op_cost_accurate ? "" : "~") - << cost << " (" << strings::HumanReadableNumBytes(op_mem_usage) - << " [" << mem_usage_percent << "%] " + << cost << " / " << compute_cost << " / " << memory_cost << " (" + << strings::HumanReadableNumBytes(op_mem_usage) << " [" + << mem_usage_percent << "%] " << (persisent_ops.count(op) > 0 ? ": persistent op)" : ")"); } } -- GitLab From d9cca05cbc5a4a7aeade2634e59fbf779965e3a0 Mon Sep 17 00:00:00 2001 From: Shashi Shekhar Date: Tue, 24 Apr 2018 11:24:37 -0700 Subject: [PATCH 213/434] Fix typo in event field name. PiperOrigin-RevId: 194117352 --- tensorflow/contrib/lite/profiling/profile_buffer.h | 10 +++++----- .../contrib/lite/profiling/profile_buffer_test.cc | 4 ++-- tensorflow/contrib/lite/profiling/profiler_test.cc | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tensorflow/contrib/lite/profiling/profile_buffer.h b/tensorflow/contrib/lite/profiling/profile_buffer.h index 3bfe02571b..b2f565376c 100644 --- a/tensorflow/contrib/lite/profiling/profile_buffer.h +++ b/tensorflow/contrib/lite/profiling/profile_buffer.h @@ -37,9 +37,9 @@ struct ProfileEvent { // Label of the event. This usually describes the event. const char* tag; // Timestamp in microseconds when the event began. - int64_t begin_timestamp_ms; + int64_t begin_timestamp_us; // Timestamp in microseconds when the event ended. - int64_t end_timestamp_ms; + int64_t end_timestamp_us; // The field containing the type of event. This must be one of the event types // in EventType. EventType event_type; @@ -79,8 +79,8 @@ class ProfileBuffer { event_buffer_[index].tag = tag; event_buffer_[index].event_type = event_type; event_buffer_[index].event_metadata = event_metadata; - event_buffer_[index].begin_timestamp_ms = timestamp; - event_buffer_[index].end_timestamp_ms = 0; + event_buffer_[index].begin_timestamp_us = timestamp; + event_buffer_[index].end_timestamp_us = 0; current_index_++; return index; } @@ -103,7 +103,7 @@ class ProfileBuffer { } int event_index = event_handle % max_size; - event_buffer_[event_index].end_timestamp_ms = NowMicros(); + event_buffer_[event_index].end_timestamp_us = NowMicros(); } // Returns the size of the buffer. diff --git a/tensorflow/contrib/lite/profiling/profile_buffer_test.cc b/tensorflow/contrib/lite/profiling/profile_buffer_test.cc index 0c5f0cd314..b8784cca45 100644 --- a/tensorflow/contrib/lite/profiling/profile_buffer_test.cc +++ b/tensorflow/contrib/lite/profiling/profile_buffer_test.cc @@ -49,13 +49,13 @@ TEST(ProfileBufferTest, AddEvent) { auto event = GetProfileEvents(buffer)[0]; EXPECT_EQ(event->tag, "hello"); - EXPECT_GT(event->begin_timestamp_ms, 0); + EXPECT_GT(event->begin_timestamp_us, 0); EXPECT_EQ(event->event_type, ProfileEvent::EventType::DEFAULT); EXPECT_EQ(event->event_metadata, 42); buffer.EndEvent(event_handle); EXPECT_EQ(1, buffer.Size()); - EXPECT_GE(event->end_timestamp_ms, event->begin_timestamp_ms); + EXPECT_GE(event->end_timestamp_us, event->begin_timestamp_us); } TEST(ProfileBufferTest, OverFlow) { diff --git a/tensorflow/contrib/lite/profiling/profiler_test.cc b/tensorflow/contrib/lite/profiling/profiler_test.cc index 994523a8fb..7914f36a31 100644 --- a/tensorflow/contrib/lite/profiling/profiler_test.cc +++ b/tensorflow/contrib/lite/profiling/profiler_test.cc @@ -30,7 +30,7 @@ namespace { void AssertDurationOfEventAroundMs(const ProfileEvent* event, double expected_ms, double eps_ms) { double duration_ms = - (event->end_timestamp_ms - event->begin_timestamp_ms) / 1e3; + (event->end_timestamp_us - event->begin_timestamp_us) / 1e3; EXPECT_NEAR(expected_ms, duration_ms, eps_ms); } -- GitLab From ff013946362e7d80c53b82b64a7f5b462808ff8f Mon Sep 17 00:00:00 2001 From: Malcolm Reynolds Date: Tue, 24 Apr 2018 11:26:26 -0700 Subject: [PATCH 214/434] Clarify error message when importing a GraphDef with unknown ops. This should make the situation from github.com/tensorflow/tensorflow/issues/17014 less confusing. PiperOrigin-RevId: 194117660 --- tensorflow/python/framework/importer.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/framework/importer.py b/tensorflow/python/framework/importer.py index 3f8a8c4bef..5112bea48b 100644 --- a/tensorflow/python/framework/importer.py +++ b/tensorflow/python/framework/importer.py @@ -572,7 +572,14 @@ def import_graph_def(graph_def, if node.name in name_to_op: raise ValueError('Duplicate name \'%s\' in GraphDef.' % node.name) if node.op not in op_dict: - raise ValueError('No op named %s in defined operations.' % node.op) + raise ValueError( + 'No op named %s in defined operations. If the Graph you are ' + 'importing uses custom ops or any parts of tf.contrib, you ' + 'should explicitly import the libraries defining those ops ' + 'before loading the Graph. Note that tf.contrib is lazily loaded ' + 'when accessed, so simply referencing (e.g.) ' + '`tf.contrib.resampler` will cause those ops to be made ' + 'available.' % node.op) op_def = op_dict[node.op] output_types = _OutputTypes(node, op_dict) -- GitLab From de3e9830aae0904f0d40d37e9da5b113c4a9a0f0 Mon Sep 17 00:00:00 2001 From: Francois Chollet Date: Tue, 24 Apr 2018 11:29:43 -0700 Subject: [PATCH 215/434] Small refactor of tf.keras aiming at centralizing reusable utilities in `utils`. PiperOrigin-RevId: 194118244 --- .../_impl/keras/applications/mobilenet.py | 1 - .../keras/_impl/keras/engine/base_layer.py | 133 ++---------------- .../keras/_impl/keras/engine/network.py | 39 ++--- .../keras/_impl/keras/engine/topology_test.py | 8 +- .../keras/layers/advanced_activations.py | 14 +- .../keras/_impl/keras/layers/convolutional.py | 4 +- .../keras/layers/convolutional_recurrent.py | 6 +- .../keras/_impl/keras/layers/embeddings.py | 6 +- .../python/keras/_impl/keras/layers/local.py | 10 +- .../python/keras/_impl/keras/layers/merge.py | 16 +-- .../python/keras/_impl/keras/layers/noise.py | 8 +- .../keras/_impl/keras/layers/recurrent.py | 26 ++-- .../keras/_impl/keras/layers/wrappers.py | 18 +-- .../keras/_impl/keras/utils/generic_utils.py | 30 ++++ .../keras/_impl/keras/utils/tf_utils.py | 80 +++++++++++ 15 files changed, 199 insertions(+), 200 deletions(-) diff --git a/tensorflow/python/keras/_impl/keras/applications/mobilenet.py b/tensorflow/python/keras/_impl/keras/applications/mobilenet.py index 12775fccec..7b7288793d 100644 --- a/tensorflow/python/keras/_impl/keras/applications/mobilenet.py +++ b/tensorflow/python/keras/_impl/keras/applications/mobilenet.py @@ -79,7 +79,6 @@ from tensorflow.python.keras._impl.keras.applications import imagenet_utils from tensorflow.python.keras._impl.keras.applications.imagenet_utils import _obtain_input_shape from tensorflow.python.keras._impl.keras.applications.imagenet_utils import decode_predictions from tensorflow.python.keras._impl.keras.engine import InputSpec -from tensorflow.python.keras._impl.keras.engine.base_layer import shape_type_conversion from tensorflow.python.keras._impl.keras.engine.network import get_source_inputs from tensorflow.python.keras._impl.keras.layers import Activation from tensorflow.python.keras._impl.keras.layers import BatchNormalization diff --git a/tensorflow/python/keras/_impl/keras/engine/base_layer.py b/tensorflow/python/keras/_impl/keras/engine/base_layer.py index abae6c3785..a3e78c95dc 100644 --- a/tensorflow/python/keras/_impl/keras/engine/base_layer.py +++ b/tensorflow/python/keras/_impl/keras/engine/base_layer.py @@ -20,7 +20,6 @@ from __future__ import print_function import collections import inspect # Necessary supplement to tf_inspect to deal with variadic args. -import re import numpy as np from six.moves import zip # pylint: disable=redefined-builtin @@ -35,6 +34,10 @@ from tensorflow.python.keras._impl.keras import constraints from tensorflow.python.keras._impl.keras import initializers from tensorflow.python.keras._impl.keras import regularizers from tensorflow.python.keras._impl.keras.utils import generic_utils +from tensorflow.python.keras._impl.keras.utils import tf_utils +# A module that only depends on `keras.layers` import these from here. +from tensorflow.python.keras._impl.keras.utils.generic_utils import to_snake_case # pylint: disable=unused-import +from tensorflow.python.keras._impl.keras.utils.tf_utils import is_tensor_or_tensor_list # pylint: disable=unused-import from tensorflow.python.ops import array_ops from tensorflow.python.ops import init_ops from tensorflow.python.ops import variable_scope as vs @@ -177,7 +180,8 @@ class Layer(checkpointable.CheckpointableBase): def _init_set_name(self, name, zero_based=True): if not name: self._name = unique_layer_name( - to_snake_case(self.__class__.__name__), zero_based=zero_based) + generic_utils.to_snake_case(self.__class__.__name__), + zero_based=zero_based) else: self._name = name @@ -318,7 +322,7 @@ class Layer(checkpointable.CheckpointableBase): # Requesting input-conditional updates. inputs = nest.flatten(inputs) - reachable = get_reachable_from_inputs(inputs, self.updates) + reachable = tf_utils.get_reachable_from_inputs(inputs, self.updates) updates = [] for update in self.updates: if update in reachable: @@ -419,7 +423,7 @@ class Layer(checkpointable.CheckpointableBase): # The losses we want to return will be part of this set. # To avoid unnecessary work, we stop the search in case all of # `self.losses` have been retrieved. - reachable = get_reachable_from_inputs(inputs, self.losses) + reachable = tf_utils.get_reachable_from_inputs(inputs, self.losses) losses = [] for loss in self.losses: if loss in reachable: @@ -639,7 +643,7 @@ class Layer(checkpointable.CheckpointableBase): if not hasattr(self, '_call_fn_args'): self._call_fn_args = estimator_util.fn_args(self.call) if ('mask' in self._call_fn_args and 'mask' not in kwargs and - not is_all_none(previous_mask)): + not generic_utils.is_all_none(previous_mask)): # The previous layer generated a mask, and mask was not explicitly pass # to __call__, hence we set previous_mask as the default value. kwargs['mask'] = previous_mask @@ -1615,9 +1619,9 @@ class Node(object): # Following 2 properties: input and output shapes. # List of shape tuples, shapes of input_tensors. - self.input_shapes = [static_shape(x) for x in input_tensors] + self.input_shapes = [backend.int_shape(x) for x in input_tensors] # List of shape tuples, shapes of output_tensors. - self.output_shapes = [static_shape(x) for x in output_tensors] + self.output_shapes = [backend.int_shape(x) for x in output_tensors] # Optional keyword arguments to layer's `call`. self.arguments = arguments @@ -1678,91 +1682,6 @@ class DeferredTensor(object): self.dtype.name) -def shape_type_conversion(fn): - """Decorator that handles tuple/TensorShape conversion. - - Used in `compute_output_shape` and `build`. - - Arguments: - fn: function to wrap. - - Returns: - Wrapped function. - """ - - def wrapper(instance, input_shape): - if input_shape is not None: - if isinstance(input_shape, list): - input_shape = [ - tuple(tensor_shape.TensorShape(x).as_list()) for x in input_shape] - else: - input_shape = tuple(tensor_shape.TensorShape(input_shape).as_list()) - output_shape = fn(instance, input_shape) - if output_shape is not None: - if isinstance(output_shape, list): - return [tensor_shape.TensorShape(x) for x in output_shape] - return tensor_shape.TensorShape(output_shape) - - return wrapper - - -def object_list_uid(object_list): - """Creates a single string from object ids.""" - object_list = nest.flatten(object_list) - return ', '.join([str(abs(id(x))) for x in object_list]) - - -def static_shape(x): - """Get the static shape of a Tensor, or None if it is unavailable.""" - if x is None: - return None - try: - return tuple(x.get_shape().as_list()) - except ValueError: - return None - - -def get_reachable_from_inputs(inputs, targets=None): - """Returns the set of tensors/ops reachable from `inputs`. - - Stops if all targets have been found (target is optional). - - Only valid in Symbolic mode, not Eager mode. - - Args: - inputs: List of tensors. - targets: List of tensors. - - Returns: - A set of tensors reachable from the inputs (includes the inputs themselves). - """ - reachable = set(inputs) - if targets: - targets = set(targets) - queue = inputs[:] - - while queue: - x = queue.pop() - if isinstance(x, ops.Operation): - outputs = x.outputs[:] or [] - outputs += x._control_outputs - elif isinstance(x, ops.Tensor): - outputs = x.consumers() - elif isinstance(x, tf_variables.Variable): - outputs = [x.op] - else: - raise TypeError('Expected Operation, Variable, or Tensor, got ' + str(x)) - - for y in outputs: - if y not in reachable: - reachable.add(y) - queue.insert(0, y) - - if targets and targets.issubset(reachable): - return reachable - return reachable - - def unique_layer_name(name, name_uid_map=None, avoid_names=None, namespace='', zero_based=False): """Makes a layer name (or arbitrary string) unique within a TensorFlow graph. @@ -1809,28 +1728,6 @@ def unique_layer_name(name, name_uid_map=None, avoid_names=None, namespace='', return proposed_name -def to_snake_case(name): - intermediate = re.sub('(.)([A-Z][a-z0-9]+)', r'\1_\2', name) - insecure = re.sub('([a-z])([A-Z])', r'\1_\2', intermediate).lower() - # If the class is private the name starts with "_" which is not secure - # for creating scopes. We prefix the name with "private" in this case. - if insecure[0] != '_': - return insecure - return 'private' + insecure - - -def is_all_none(iterable_or_element): - if not isinstance(iterable_or_element, (list, tuple)): - iterable = [iterable_or_element] - else: - iterable = iterable_or_element - # We cannot use Python's `any` because the iterable may return Tensors. - for element in iterable: - if element is not None: - return False - return True - - def have_all_keras_metadata(iterable_or_element): if not isinstance(iterable_or_element, (list, tuple)): iterable = [iterable_or_element] @@ -1861,14 +1758,6 @@ def collect_previous_mask(input_tensors): return masks -def is_tensor_or_tensor_list(v): - v = nest.flatten(v) - if v and isinstance(v[0], ops.Tensor): - return True - else: - return False - - def get_default_graph_uid_map(): # TODO(fchollet): refactor this into backend. graph = ops.get_default_graph() diff --git a/tensorflow/python/keras/_impl/keras/engine/network.py b/tensorflow/python/keras/_impl/keras/engine/network.py index 4127c781eb..9f8ee129aa 100644 --- a/tensorflow/python/keras/_impl/keras/engine/network.py +++ b/tensorflow/python/keras/_impl/keras/engine/network.py @@ -32,10 +32,11 @@ from tensorflow.python.eager import context from tensorflow.python.framework import errors_impl from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape -from tensorflow.python.keras._impl.keras import backend as K +from tensorflow.python.keras._impl.keras import backend from tensorflow.python.keras._impl.keras.engine import base_layer from tensorflow.python.keras._impl.keras.engine import saving from tensorflow.python.keras._impl.keras.utils import generic_utils +from tensorflow.python.keras._impl.keras.utils import tf_utils from tensorflow.python.keras._impl.keras.utils.io_utils import ask_to_proceed_with_overwrite from tensorflow.python.keras._impl.keras.utils.layer_utils import print_summary as print_layer_summary from tensorflow.python.platform import tf_logging as logging @@ -252,8 +253,8 @@ class Network(base_layer.Layer): for x in self.inputs: mask = x._keras_mask if hasattr(x, '_keras_mask') else None # pylint: disable=protected-access masks.append(mask) - mask_cache_key = (base_layer.object_list_uid(self.inputs) + '_' + - base_layer.object_list_uid(masks)) + mask_cache_key = (generic_utils.object_list_uid(self.inputs) + '_' + + generic_utils.object_list_uid(masks)) masks = [] for x in self.outputs: mask = x._keras_mask if hasattr(x, '_keras_mask') else None # pylint: disable=protected-access @@ -274,7 +275,7 @@ class Network(base_layer.Layer): self.input_names.append(layer.name) if layer.is_placeholder: self._feed_input_names.append(layer.name) - self._feed_input_shapes.append(K.int_shape(self.inputs[i])) + self._feed_input_shapes.append(backend.int_shape(self.inputs[i])) # layer.input gives an error in eager mode if not context.executing_eagerly(): self._feed_inputs.append(layer.input) @@ -373,7 +374,7 @@ class Network(base_layer.Layer): weights = [] for layer in self.layers: weights += layer.weights - return K.batch_get_value(weights) + return backend.batch_get_value(weights) def set_weights(self, weights): """Sets the weights of the model. @@ -389,7 +390,7 @@ class Network(base_layer.Layer): for sw, w in zip(layer.weights, layer_weights): tuples.append((sw, w)) weights = weights[num_param:] - K.batch_set_value(tuples) + backend.batch_set_value(tuples) def compute_mask(self, inputs, mask): if not self._is_graph_network: @@ -400,8 +401,8 @@ class Network(base_layer.Layer): masks = [None for _ in range(len(inputs))] else: masks = generic_utils.to_list(mask) - cache_key = (base_layer.object_list_uid(inputs) - + '_' + base_layer.object_list_uid(masks)) + cache_key = (generic_utils.object_list_uid(inputs) + + '_' + generic_utils.object_list_uid(masks)) if cache_key in self._output_mask_cache: return self._output_mask_cache[cache_key] else: @@ -515,7 +516,7 @@ class Network(base_layer.Layer): relevant_inputs += inputs else: relevant_inputs.append(inputs) - reachable = base_layer.get_reachable_from_inputs(relevant_inputs, updates) + reachable = tf_utils.get_reachable_from_inputs(relevant_inputs, updates) relevant_conditional_updates = [x for x in updates if x in reachable] unconditional_updates = [ x for x in updates if x._unconditional_update] # pylint: disable=protected-access @@ -552,7 +553,7 @@ class Network(base_layer.Layer): relevant_inputs += inputs else: relevant_inputs.append(inputs) - reachable = base_layer.get_reachable_from_inputs(relevant_inputs, losses) + reachable = tf_utils.get_reachable_from_inputs(relevant_inputs, losses) relevant_conditional_losses = [x for x in losses if x in reachable] unconditional_losses = [ x for x in losses if x._unconditional_loss] # pylint: disable=protected-access @@ -634,8 +635,8 @@ class Network(base_layer.Layer): if not context.executing_eagerly(): # Try to retrieve cached outputs if the layer has already been called # on these exact inputs. - cache_key = (base_layer.object_list_uid(inputs) - + '_' + base_layer.object_list_uid(masks)) + cache_key = (generic_utils.object_list_uid(inputs) + + '_' + generic_utils.object_list_uid(masks)) if cache_key in self._output_tensor_cache: # Cache hit. return self._output_tensor_cache[cache_key] @@ -667,7 +668,7 @@ class Network(base_layer.Layer): ': model has ' + str(len(self._input_layers)) + ' tensor inputs.') - cache_key = base_layer.object_list_uid(input_shapes) + cache_key = generic_utils.object_list_uid(input_shapes) if cache_key not in self._output_shape_cache: # Cache miss. We have to run the network graph manually (recursive calls # to `compute_output_shape`). @@ -856,7 +857,7 @@ class Network(base_layer.Layer): for x in self.outputs: assert str(id(x)) in tensor_map, 'Could not compute output ' + str(x) tensor, mask = tensor_map[str(id(x))] - output_shapes.append(base_layer.static_shape(x)) + output_shapes.append(backend.int_shape(x)) output_tensors.append(tensor) output_masks.append(mask) @@ -870,14 +871,14 @@ class Network(base_layer.Layer): if not context.executing_eagerly(): # Update cache; # keys are based on ids on input tensors and inputs masks. - cache_key = (base_layer.object_list_uid(inputs) - + '_' + base_layer.object_list_uid(masks)) + cache_key = (generic_utils.object_list_uid(inputs) + + '_' + generic_utils.object_list_uid(masks)) self._output_tensor_cache[cache_key] = output_tensors self._output_mask_cache[cache_key] = output_masks if output_shapes is not None: - input_shapes = [base_layer.static_shape(x) for x in inputs] - cache_key = base_layer.object_list_uid(input_shapes) + input_shapes = [backend.int_shape(x) for x in inputs] + cache_key = generic_utils.object_list_uid(input_shapes) self._output_shape_cache[cache_key] = output_shapes return output_tensors, output_masks @@ -1338,7 +1339,7 @@ class Network(base_layer.Layer): 'class_name': self.__class__.__name__, 'config': config, 'keras_version': keras_version, - 'backend': K.backend() + 'backend': backend.backend() } return model_config diff --git a/tensorflow/python/keras/_impl/keras/engine/topology_test.py b/tensorflow/python/keras/_impl/keras/engine/topology_test.py index 49cc1cd3b3..6993a04289 100644 --- a/tensorflow/python/keras/_impl/keras/engine/topology_test.py +++ b/tensorflow/python/keras/_impl/keras/engine/topology_test.py @@ -964,16 +964,16 @@ class GraphUtilsTest(test.TestCase): x_5 = x_3 * pl_1 self.assertEqual( - keras.engine.base_layer.get_reachable_from_inputs([pl_1]), + keras.utils.tf_utils.get_reachable_from_inputs([pl_1]), {pl_1, x_1, x_4, x_5, x_1.op, x_4.op, x_5.op}) self.assertEqual( - keras.engine.base_layer.get_reachable_from_inputs([pl_1, pl_2]), + keras.utils.tf_utils.get_reachable_from_inputs([pl_1, pl_2]), {pl_1, pl_2, x_1, x_2, x_4, x_5, x_1.op, x_2.op, x_4.op, x_5.op}) self.assertEqual( - keras.engine.base_layer.get_reachable_from_inputs([pl_3]), + keras.utils.tf_utils.get_reachable_from_inputs([pl_3]), {pl_3, x_3, x_5, x_3.op, x_5.op}) self.assertEqual( - keras.engine.base_layer.get_reachable_from_inputs([x_3]), + keras.utils.tf_utils.get_reachable_from_inputs([x_3]), {x_3, x_5, x_5.op}) diff --git a/tensorflow/python/keras/_impl/keras/layers/advanced_activations.py b/tensorflow/python/keras/_impl/keras/layers/advanced_activations.py index 11ca89d625..89931db3c0 100644 --- a/tensorflow/python/keras/_impl/keras/layers/advanced_activations.py +++ b/tensorflow/python/keras/_impl/keras/layers/advanced_activations.py @@ -25,7 +25,7 @@ from tensorflow.python.keras._impl.keras import initializers from tensorflow.python.keras._impl.keras import regularizers from tensorflow.python.keras._impl.keras.engine import InputSpec from tensorflow.python.keras._impl.keras.engine import Layer -from tensorflow.python.keras._impl.keras.engine.base_layer import shape_type_conversion +from tensorflow.python.keras._impl.keras.utils import tf_utils from tensorflow.python.ops import math_ops from tensorflow.python.util.tf_export import tf_export @@ -64,7 +64,7 @@ class LeakyReLU(Layer): base_config = super(LeakyReLU, self).get_config() return dict(list(base_config.items()) + list(config.items())) - @shape_type_conversion + @tf_utils.shape_type_conversion def compute_output_shape(self, input_shape): return input_shape @@ -119,7 +119,7 @@ class PReLU(Layer): else: self.shared_axes = list(shared_axes) - @shape_type_conversion + @tf_utils.shape_type_conversion def build(self, input_shape): param_shape = list(input_shape[1:]) self.param_broadcast = [False] * len(param_shape) @@ -162,7 +162,7 @@ class PReLU(Layer): base_config = super(PReLU, self).get_config() return dict(list(base_config.items()) + list(config.items())) - @shape_type_conversion + @tf_utils.shape_type_conversion def compute_output_shape(self, input_shape): return input_shape @@ -201,7 +201,7 @@ class ELU(Layer): base_config = super(ELU, self).get_config() return dict(list(base_config.items()) + list(config.items())) - @shape_type_conversion + @tf_utils.shape_type_conversion def compute_output_shape(self, input_shape): return input_shape @@ -241,7 +241,7 @@ class ThresholdedReLU(Layer): base_config = super(ThresholdedReLU, self).get_config() return dict(list(base_config.items()) + list(config.items())) - @shape_type_conversion + @tf_utils.shape_type_conversion def compute_output_shape(self, input_shape): return input_shape @@ -275,6 +275,6 @@ class Softmax(Layer): base_config = super(Softmax, self).get_config() return dict(list(base_config.items()) + list(config.items())) - @shape_type_conversion + @tf_utils.shape_type_conversion def compute_output_shape(self, input_shape): return input_shape diff --git a/tensorflow/python/keras/_impl/keras/layers/convolutional.py b/tensorflow/python/keras/_impl/keras/layers/convolutional.py index 12b965587f..9971f12773 100644 --- a/tensorflow/python/keras/_impl/keras/layers/convolutional.py +++ b/tensorflow/python/keras/_impl/keras/layers/convolutional.py @@ -28,7 +28,6 @@ from tensorflow.python.keras._impl.keras import initializers from tensorflow.python.keras._impl.keras import regularizers from tensorflow.python.keras._impl.keras.engine import InputSpec from tensorflow.python.keras._impl.keras.engine import Layer -from tensorflow.python.keras._impl.keras.engine.base_layer import shape_type_conversion # imports for backwards namespace compatibility # pylint: disable=unused-import from tensorflow.python.keras._impl.keras.layers.pooling import AveragePooling1D @@ -39,6 +38,7 @@ from tensorflow.python.keras._impl.keras.layers.pooling import MaxPooling2D from tensorflow.python.keras._impl.keras.layers.pooling import MaxPooling3D # pylint: enable=unused-import from tensorflow.python.keras._impl.keras.utils import conv_utils +from tensorflow.python.keras._impl.keras.utils import tf_utils from tensorflow.python.ops import array_ops from tensorflow.python.ops import nn from tensorflow.python.ops import nn_ops @@ -1731,7 +1731,7 @@ class DepthwiseConv2D(Conv2D): return outputs - @shape_type_conversion + @tf_utils.shape_type_conversion def compute_output_shape(self, input_shape): if self.data_format == 'channels_first': rows = input_shape[2] diff --git a/tensorflow/python/keras/_impl/keras/layers/convolutional_recurrent.py b/tensorflow/python/keras/_impl/keras/layers/convolutional_recurrent.py index 6b2a1d98fe..be25bbc043 100644 --- a/tensorflow/python/keras/_impl/keras/layers/convolutional_recurrent.py +++ b/tensorflow/python/keras/_impl/keras/layers/convolutional_recurrent.py @@ -28,11 +28,11 @@ from tensorflow.python.keras._impl.keras import initializers from tensorflow.python.keras._impl.keras import regularizers from tensorflow.python.keras._impl.keras.engine import InputSpec from tensorflow.python.keras._impl.keras.engine import Layer -from tensorflow.python.keras._impl.keras.engine.base_layer import shape_type_conversion from tensorflow.python.keras._impl.keras.layers.recurrent import _generate_dropout_mask from tensorflow.python.keras._impl.keras.layers.recurrent import RNN from tensorflow.python.keras._impl.keras.utils import conv_utils from tensorflow.python.keras._impl.keras.utils import generic_utils +from tensorflow.python.keras._impl.keras.utils import tf_utils from tensorflow.python.util.tf_export import tf_export @@ -168,7 +168,7 @@ class ConvRNN2D(RNN): self.input_spec = [InputSpec(ndim=5)] self.states = None - @shape_type_conversion + @tf_utils.shape_type_conversion def compute_output_shape(self, input_shape): if isinstance(input_shape, list): input_shape = input_shape[0] @@ -209,7 +209,7 @@ class ConvRNN2D(RNN): for _ in range(2)] return output_shape - @shape_type_conversion + @tf_utils.shape_type_conversion def build(self, input_shape): # Note input_shape will be list of shapes of initial states and # constants if these are passed in __call__. diff --git a/tensorflow/python/keras/_impl/keras/layers/embeddings.py b/tensorflow/python/keras/_impl/keras/layers/embeddings.py index 07b8726b85..2b353ac007 100644 --- a/tensorflow/python/keras/_impl/keras/layers/embeddings.py +++ b/tensorflow/python/keras/_impl/keras/layers/embeddings.py @@ -23,7 +23,7 @@ from tensorflow.python.keras._impl.keras import constraints from tensorflow.python.keras._impl.keras import initializers from tensorflow.python.keras._impl.keras import regularizers from tensorflow.python.keras._impl.keras.engine import Layer -from tensorflow.python.keras._impl.keras.engine.base_layer import shape_type_conversion +from tensorflow.python.keras._impl.keras.utils import tf_utils from tensorflow.python.ops import embedding_ops from tensorflow.python.ops import math_ops from tensorflow.python.util.tf_export import tf_export @@ -114,7 +114,7 @@ class Embedding(Layer): self.mask_zero = mask_zero self.input_length = input_length - @shape_type_conversion + @tf_utils.shape_type_conversion def build(self, input_shape): self.embeddings = self.add_weight( shape=(self.input_dim, self.output_dim), @@ -130,7 +130,7 @@ class Embedding(Layer): else: return math_ops.not_equal(inputs, 0) - @shape_type_conversion + @tf_utils.shape_type_conversion def compute_output_shape(self, input_shape): if self.input_length is None: return input_shape + (self.output_dim,) diff --git a/tensorflow/python/keras/_impl/keras/layers/local.py b/tensorflow/python/keras/_impl/keras/layers/local.py index 13d96e9392..caae820fb3 100644 --- a/tensorflow/python/keras/_impl/keras/layers/local.py +++ b/tensorflow/python/keras/_impl/keras/layers/local.py @@ -25,8 +25,8 @@ from tensorflow.python.keras._impl.keras import initializers from tensorflow.python.keras._impl.keras import regularizers from tensorflow.python.keras._impl.keras.engine import InputSpec from tensorflow.python.keras._impl.keras.engine import Layer -from tensorflow.python.keras._impl.keras.engine.base_layer import shape_type_conversion from tensorflow.python.keras._impl.keras.utils import conv_utils +from tensorflow.python.keras._impl.keras.utils import tf_utils from tensorflow.python.util.tf_export import tf_export @@ -120,7 +120,7 @@ class LocallyConnected1D(Layer): self.bias_constraint = constraints.get(bias_constraint) self.input_spec = InputSpec(ndim=3) - @shape_type_conversion + @tf_utils.shape_type_conversion def build(self, input_shape): input_dim = input_shape[2] if input_dim is None: @@ -148,7 +148,7 @@ class LocallyConnected1D(Layer): self.input_spec = InputSpec(ndim=3, axes={2: input_dim}) self.built = True - @shape_type_conversion + @tf_utils.shape_type_conversion def compute_output_shape(self, input_shape): length = conv_utils.conv_output_length(input_shape[1], self.kernel_size[0], self.padding, self.strides[0]) @@ -307,7 +307,7 @@ class LocallyConnected2D(Layer): self.bias_constraint = constraints.get(bias_constraint) self.input_spec = InputSpec(ndim=4) - @shape_type_conversion + @tf_utils.shape_type_conversion def build(self, input_shape): if self.data_format == 'channels_last': input_row, input_col = input_shape[1:-1] @@ -350,7 +350,7 @@ class LocallyConnected2D(Layer): self.input_spec = InputSpec(ndim=4, axes={-1: input_filter}) self.built = True - @shape_type_conversion + @tf_utils.shape_type_conversion def compute_output_shape(self, input_shape): if self.data_format == 'channels_first': rows = input_shape[2] diff --git a/tensorflow/python/keras/_impl/keras/layers/merge.py b/tensorflow/python/keras/_impl/keras/layers/merge.py index 7c87e6c067..2b6cf7c8a9 100644 --- a/tensorflow/python/keras/_impl/keras/layers/merge.py +++ b/tensorflow/python/keras/_impl/keras/layers/merge.py @@ -22,7 +22,7 @@ from __future__ import print_function from tensorflow.python.keras._impl.keras import backend as K from tensorflow.python.keras._impl.keras.engine.base_layer import Layer -from tensorflow.python.keras._impl.keras.engine.base_layer import shape_type_conversion +from tensorflow.python.keras._impl.keras.utils import tf_utils from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn @@ -83,7 +83,7 @@ class _Merge(Layer): output_shape.append(i) return tuple(output_shape) - @shape_type_conversion + @tf_utils.shape_type_conversion def build(self, input_shape): # Used purely for shape validation. if not isinstance(input_shape, list): @@ -181,7 +181,7 @@ class _Merge(Layer): else: return self._merge_function(inputs) - @shape_type_conversion + @tf_utils.shape_type_conversion def compute_output_shape(self, input_shape): if input_shape[0] is None: output_shape = None @@ -274,7 +274,7 @@ class Subtract(_Merge): ``` """ - @shape_type_conversion + @tf_utils.shape_type_conversion def build(self, input_shape): super(Subtract, self).build(input_shape) if len(input_shape) != 2: @@ -370,7 +370,7 @@ class Concatenate(_Merge): self.supports_masking = True self._reshape_required = False - @shape_type_conversion + @tf_utils.shape_type_conversion def build(self, input_shape): # Used purely for shape validation. if not isinstance(input_shape, list) or len(input_shape) < 2: @@ -392,7 +392,7 @@ class Concatenate(_Merge): def _merge_function(self, inputs): return K.concatenate(inputs, axis=self.axis) - @shape_type_conversion + @tf_utils.shape_type_conversion def compute_output_shape(self, input_shape): if not isinstance(input_shape, list): raise ValueError('A `Concatenate` layer should be called ' @@ -478,7 +478,7 @@ class Dot(_Merge): self.supports_masking = True self._reshape_required = False - @shape_type_conversion + @tf_utils.shape_type_conversion def build(self, input_shape): # Used purely for shape validation. if not isinstance(input_shape, list) or len(input_shape) != 2: @@ -523,7 +523,7 @@ class Dot(_Merge): output = K.batch_dot(x1, x2, axes) return output - @shape_type_conversion + @tf_utils.shape_type_conversion def compute_output_shape(self, input_shape): if not isinstance(input_shape, list) or len(input_shape) != 2: raise ValueError('A `Dot` layer should be called ' diff --git a/tensorflow/python/keras/_impl/keras/layers/noise.py b/tensorflow/python/keras/_impl/keras/layers/noise.py index 72dc7a1ff8..addac5b137 100644 --- a/tensorflow/python/keras/_impl/keras/layers/noise.py +++ b/tensorflow/python/keras/_impl/keras/layers/noise.py @@ -22,7 +22,7 @@ import numpy as np from tensorflow.python.keras._impl.keras import backend as K from tensorflow.python.keras._impl.keras.engine import Layer -from tensorflow.python.keras._impl.keras.engine.base_layer import shape_type_conversion +from tensorflow.python.keras._impl.keras.utils import tf_utils from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops from tensorflow.python.util.tf_export import tf_export @@ -69,7 +69,7 @@ class GaussianNoise(Layer): base_config = super(GaussianNoise, self).get_config() return dict(list(base_config.items()) + list(config.items())) - @shape_type_conversion + @tf_utils.shape_type_conversion def compute_output_shape(self, input_shape): return input_shape @@ -116,7 +116,7 @@ class GaussianDropout(Layer): base_config = super(GaussianDropout, self).get_config() return dict(list(base_config.items()) + list(config.items())) - @shape_type_conversion + @tf_utils.shape_type_conversion def compute_output_shape(self, input_shape): return input_shape @@ -188,6 +188,6 @@ class AlphaDropout(Layer): base_config = super(AlphaDropout, self).get_config() return dict(list(base_config.items()) + list(config.items())) - @shape_type_conversion + @tf_utils.shape_type_conversion def compute_output_shape(self, input_shape): return input_shape diff --git a/tensorflow/python/keras/_impl/keras/layers/recurrent.py b/tensorflow/python/keras/_impl/keras/layers/recurrent.py index f53db987ff..f6d6e1391c 100644 --- a/tensorflow/python/keras/_impl/keras/layers/recurrent.py +++ b/tensorflow/python/keras/_impl/keras/layers/recurrent.py @@ -31,8 +31,8 @@ from tensorflow.python.keras._impl.keras import initializers from tensorflow.python.keras._impl.keras import regularizers from tensorflow.python.keras._impl.keras.engine import InputSpec from tensorflow.python.keras._impl.keras.engine import Layer -from tensorflow.python.keras._impl.keras.engine.base_layer import shape_type_conversion -from tensorflow.python.keras._impl.keras.utils.generic_utils import has_arg +from tensorflow.python.keras._impl.keras.utils import generic_utils +from tensorflow.python.keras._impl.keras.utils import tf_utils from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import state_ops @@ -107,7 +107,7 @@ class StackedRNNCells(Layer): # Call the cells in order and store the returned states. new_nested_states = [] for cell, states in zip(self.cells, nested_states): - if has_arg(cell.call, 'constants'): + if generic_utils.has_arg(cell.call, 'constants'): inputs, states = cell.call(inputs, states, constants=constants, **kwargs) else: @@ -122,14 +122,14 @@ class StackedRNNCells(Layer): states += cell_states return inputs, states - @shape_type_conversion + @tf_utils.shape_type_conversion def build(self, input_shape): if isinstance(input_shape, list): constants_shape = input_shape[1:] input_shape = input_shape[0] for cell in self.cells: if isinstance(cell, Layer): - if has_arg(cell.call, 'constants'): + if generic_utils.has_arg(cell.call, 'constants'): cell.build([input_shape] + constants_shape) else: cell.build(input_shape) @@ -429,7 +429,7 @@ class RNN(Layer): def states(self, states): self._states = states - @shape_type_conversion + @tf_utils.shape_type_conversion def compute_output_shape(self, input_shape): if isinstance(input_shape, list): input_shape = input_shape[0] @@ -461,7 +461,7 @@ class RNN(Layer): else: return output_mask - @shape_type_conversion + @tf_utils.shape_type_conversion def build(self, input_shape): # Note input_shape will be list of shapes of initial states and # constants if these are passed in __call__. @@ -609,11 +609,11 @@ class RNN(Layer): 'or `batch_shape` argument to your Input layer.') kwargs = {} - if has_arg(self.cell.call, 'training'): + if generic_utils.has_arg(self.cell.call, 'training'): kwargs['training'] = training if constants: - if not has_arg(self.cell.call, 'constants'): + if not generic_utils.has_arg(self.cell.call, 'constants'): raise ValueError('RNN cell does not support constants') def step(inputs, states): @@ -884,7 +884,7 @@ class SimpleRNNCell(Layer): self._dropout_mask = None self._recurrent_dropout_mask = None - @shape_type_conversion + @tf_utils.shape_type_conversion def build(self, input_shape): self.kernel = self.add_weight( shape=(input_shape[-1], self.units), @@ -1287,7 +1287,7 @@ class GRUCell(Layer): self._dropout_mask = None self._recurrent_dropout_mask = None - @shape_type_conversion + @tf_utils.shape_type_conversion def build(self, input_shape): input_dim = input_shape[-1] self.kernel = self.add_weight( @@ -1824,7 +1824,7 @@ class LSTMCell(Layer): self._dropout_mask = None self._recurrent_dropout_mask = None - @shape_type_conversion + @tf_utils.shape_type_conversion def build(self, input_shape): input_dim = input_shape[-1] self.kernel = self.add_weight( @@ -2388,7 +2388,7 @@ class Recurrent(Layer): self.dropout = 0 self.recurrent_dropout = 0 - @shape_type_conversion + @tf_utils.shape_type_conversion def compute_output_shape(self, input_shape): if isinstance(input_shape, list): input_shape = input_shape[0] diff --git a/tensorflow/python/keras/_impl/keras/layers/wrappers.py b/tensorflow/python/keras/_impl/keras/layers/wrappers.py index 9aee5f03b6..34a8eeeb5b 100644 --- a/tensorflow/python/keras/_impl/keras/layers/wrappers.py +++ b/tensorflow/python/keras/_impl/keras/layers/wrappers.py @@ -23,11 +23,10 @@ import copy from tensorflow.python.framework import tensor_shape from tensorflow.python.keras._impl.keras import backend as K -from tensorflow.python.keras._impl.keras.engine import base_layer from tensorflow.python.keras._impl.keras.engine import InputSpec from tensorflow.python.keras._impl.keras.engine import Layer -from tensorflow.python.keras._impl.keras.engine.base_layer import shape_type_conversion -from tensorflow.python.keras._impl.keras.utils.generic_utils import has_arg +from tensorflow.python.keras._impl.keras.utils import generic_utils +from tensorflow.python.keras._impl.keras.utils import tf_utils from tensorflow.python.ops import array_ops from tensorflow.python.util.tf_export import tf_export @@ -183,7 +182,7 @@ class TimeDistributed(Wrapper): def call(self, inputs, training=None, mask=None): kwargs = {} - if has_arg(self.layer.call, 'training'): + if generic_utils.has_arg(self.layer.call, 'training'): kwargs['training'] = training uses_learning_phase = False # pylint: disable=redefined-outer-name @@ -213,7 +212,7 @@ class TimeDistributed(Wrapper): input_length = array_ops.shape(inputs)[1] # Shape: (num_samples * timesteps, ...). And track the # transformation in self._input_map. - input_uid = base_layer.object_list_uid(inputs) + input_uid = generic_utils.object_list_uid(inputs) inputs = array_ops.reshape(inputs, (-1,) + input_shape[2:]) self._input_map[input_uid] = inputs # (num_samples * timesteps, ...) @@ -305,7 +304,7 @@ class Bidirectional(Wrapper): self.forward_layer.set_weights(weights[:nw // 2]) self.backward_layer.set_weights(weights[nw // 2:]) - @shape_type_conversion + @tf_utils.shape_type_conversion def compute_output_shape(self, input_shape): output_shape = tuple(self.forward_layer.compute_output_shape( input_shape).as_list()) @@ -383,12 +382,13 @@ class Bidirectional(Wrapper): def call(self, inputs, training=None, mask=None, initial_state=None): kwargs = {} - if has_arg(self.layer.call, 'training'): + if generic_utils.has_arg(self.layer.call, 'training'): kwargs['training'] = training - if has_arg(self.layer.call, 'mask'): + if generic_utils.has_arg(self.layer.call, 'mask'): kwargs['mask'] = mask - if initial_state is not None and has_arg(self.layer.call, 'initial_state'): + if initial_state is not None and generic_utils.has_arg( + self.layer.call, 'initial_state'): forward_state = initial_state[:len(initial_state) // 2] backward_state = initial_state[len(initial_state) // 2:] y = self.forward_layer.call(inputs, initial_state=forward_state, **kwargs) diff --git a/tensorflow/python/keras/_impl/keras/utils/generic_utils.py b/tensorflow/python/keras/_impl/keras/utils/generic_utils.py index 3bbe87f92d..db184d278c 100644 --- a/tensorflow/python/keras/_impl/keras/utils/generic_utils.py +++ b/tensorflow/python/keras/_impl/keras/utils/generic_utils.py @@ -21,6 +21,7 @@ import binascii import codecs import marshal import os +import re import sys import time import types as python_types @@ -28,6 +29,7 @@ import types as python_types import numpy as np import six +from tensorflow.python.util import nest from tensorflow.python.util import tf_decorator from tensorflow.python.util import tf_inspect from tensorflow.python.util.tf_export import tf_export @@ -526,3 +528,31 @@ def to_list(x): if isinstance(x, list): return x return [x] + + +def object_list_uid(object_list): + """Creates a single string from object ids.""" + object_list = nest.flatten(object_list) + return ', '.join([str(abs(id(x))) for x in object_list]) + + +def to_snake_case(name): + intermediate = re.sub('(.)([A-Z][a-z0-9]+)', r'\1_\2', name) + insecure = re.sub('([a-z])([A-Z])', r'\1_\2', intermediate).lower() + # If the class is private the name starts with "_" which is not secure + # for creating scopes. We prefix the name with "private" in this case. + if insecure[0] != '_': + return insecure + return 'private' + insecure + + +def is_all_none(iterable_or_element): + if not isinstance(iterable_or_element, (list, tuple)): + iterable = [iterable_or_element] + else: + iterable = iterable_or_element + # We cannot use Python's `any` because the iterable may return Tensors. + for element in iterable: + if element is not None: + return False + return True diff --git a/tensorflow/python/keras/_impl/keras/utils/tf_utils.py b/tensorflow/python/keras/_impl/keras/utils/tf_utils.py index 8da5f77777..162e5b2cd6 100644 --- a/tensorflow/python/keras/_impl/keras/utils/tf_utils.py +++ b/tensorflow/python/keras/_impl/keras/utils/tf_utils.py @@ -17,9 +17,12 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.python.framework import ops from tensorflow.python.framework import smart_cond as smart_module +from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import variables +from tensorflow.python.util import nest def smart_cond(pred, true_fn=None, false_fn=None, name=None): @@ -72,3 +75,80 @@ def constant_value(pred): if isinstance(pred, variables.Variable): return None return smart_module.smart_constant_value(pred) + + +def is_tensor_or_tensor_list(v): + v = nest.flatten(v) + if v and isinstance(v[0], ops.Tensor): + return True + else: + return False + + +def get_reachable_from_inputs(inputs, targets=None): + """Returns the set of tensors/ops reachable from `inputs`. + + Stops if all targets have been found (target is optional). + + Only valid in Symbolic mode, not Eager mode. + + Args: + inputs: List of tensors. + targets: List of tensors. + + Returns: + A set of tensors reachable from the inputs (includes the inputs themselves). + """ + reachable = set(inputs) + if targets: + targets = set(targets) + queue = inputs[:] + + while queue: + x = queue.pop() + if isinstance(x, ops.Operation): + outputs = x.outputs[:] or [] + outputs += x._control_outputs # pylint: disable=protected-access + elif isinstance(x, ops.Tensor): + outputs = x.consumers() + elif isinstance(x, variables.Variable): + outputs = [x.op] + else: + raise TypeError('Expected Operation, Variable, or Tensor, got ' + str(x)) + + for y in outputs: + if y not in reachable: + reachable.add(y) + queue.insert(0, y) + + if targets and targets.issubset(reachable): + return reachable + return reachable + + +def shape_type_conversion(fn): + """Decorator that handles tuple/TensorShape conversion. + + Used in `compute_output_shape` and `build`. + + Arguments: + fn: function to wrap. + + Returns: + Wrapped function. + """ + + def wrapper(instance, input_shape): + if input_shape is not None: + if isinstance(input_shape, list): + input_shape = [ + tuple(tensor_shape.TensorShape(x).as_list()) for x in input_shape] + else: + input_shape = tuple(tensor_shape.TensorShape(input_shape).as_list()) + output_shape = fn(instance, input_shape) + if output_shape is not None: + if isinstance(output_shape, list): + return [tensor_shape.TensorShape(x) for x in output_shape] + return tensor_shape.TensorShape(output_shape) + + return wrapper -- GitLab From c2b1eebe7e256dda88beb91c7fa7662e01d12f9b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 24 Apr 2018 11:43:35 -0700 Subject: [PATCH 216/434] Updating tests in constant_folding_test.cc so that the tests evaluate the original and optimized graphs and check that the output is the same. PiperOrigin-RevId: 194120424 --- .../optimizers/constant_folding_test.cc | 80 ++++++++++++++++++- 1 file changed, 79 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/grappler/optimizers/constant_folding_test.cc b/tensorflow/core/grappler/optimizers/constant_folding_test.cc index 1acce05909..32dca29e12 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding_test.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding_test.cc @@ -520,6 +520,25 @@ TEST_F(ConstantFoldingTest, NeutralElement_PartialShape_UnknownOutputShape) { EXPECT_EQ("Mul", node.op()) << node.name(); } } + + const std::vector fetch = {"mul_0", "mul_4", "mul_8"}; + auto x_known_t = GenerateRandomTensor(TensorShape({2, 2})); + auto x_partially_unknown_t = + GenerateRandomTensor(TensorShape({3, 4})); + auto x_unknown_t = GenerateRandomTensor(TensorShape({5, 7})); + auto expected_tensors = + EvaluateNodes(item.graph, fetch, + {{"x_known", x_known_t}, + {"x_partially_unknown", x_partially_unknown_t}, + {"x_unknown", x_unknown_t}}); + EXPECT_EQ(fetch.size(), expected_tensors.size()); + auto tensors = EvaluateNodes(output, fetch, + {{"x_known", x_known_t}, + {"x_partially_unknown", x_partially_unknown_t}, + {"x_unknown", x_unknown_t}}); + EXPECT_EQ(fetch.size(), tensors.size()); + for (int i = 0; i < tensors.size(); i++) + test::ExpectTensorNear(expected_tensors[i], tensors[i], 1e-5); } TEST_F(ConstantFoldingTest, NeutralElement_PartialShape_KnownOutputShape) { @@ -572,6 +591,20 @@ TEST_F(ConstantFoldingTest, NeutralElement_PartialShape_KnownOutputShape) { EXPECT_TRUE(IsControlInput(node.input(1))); } } + const std::vector fetch = {"addn1"}; + auto x_partially_unknown_t = + GenerateRandomTensor(TensorShape({2, 2})); + auto x_unknown_t = GenerateRandomTensor(TensorShape({2, 2})); + auto expected_tensors = + EvaluateNodes(item.graph, fetch, + {{"x_partially_unknown", x_partially_unknown_t}, + {"x_unknown", x_unknown_t}}); + EXPECT_EQ(1, expected_tensors.size()); + auto tensors = EvaluateNodes(output, fetch, + {{"x_partially_unknown", x_partially_unknown_t}, + {"x_unknown", x_unknown_t}}); + EXPECT_EQ(1, tensors.size()); + test::ExpectTensorNear(expected_tensors[0], tensors[0], 1e-5); } TEST_F(ConstantFoldingTest, CreateConstNodes) { @@ -1064,6 +1097,20 @@ TEST_F(ConstantFoldingTest, ShapeMaterializationShapeN) { } } EXPECT_EQ(9, found); + + auto v1_t = GenerateRandomTensor(TensorShape({3, 4})); + auto v2_t = GenerateRandomTensor(TensorShape({5, 6})); + auto v3_t = GenerateRandomTensor(TensorShape({4, 6})); + const std::vector fetch_nodes = {"i1a", "i1b", "i2a", "i2b", + "i2c", "i3a", "i3b"}; + auto tensors_expected = EvaluateNodes( + item.graph, fetch_nodes, {{"v1", v1_t}, {"v2", v2_t}, {"v3", v3_t}}); + EXPECT_EQ(fetch_nodes.size(), tensors_expected.size()); + auto tensors = EvaluateNodes(output, fetch_nodes, + {{"v1", v1_t}, {"v2", v2_t}, {"v3", v3_t}}); + EXPECT_EQ(fetch_nodes.size(), tensors.size()); + for (int i = 0; i < fetch_nodes.size(); i++) + test::ExpectTensorEqual(tensors_expected[i], tensors[i]); } TEST_F(ConstantFoldingTest, ShapeMaterializationShapeN_MultipleOutputs) { @@ -1930,6 +1977,14 @@ TEST_F(ConstantFoldingTest, Packing) { Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); + const std::vector fetch_nodes = {"i1", "i2"}; + auto tensors_expected = EvaluateNodes(item.graph, fetch_nodes); + EXPECT_EQ(fetch_nodes.size(), tensors_expected.size()); + auto tensors = EvaluateNodes(output, fetch_nodes); + EXPECT_EQ(fetch_nodes.size(), tensors.size()); + for (int i = 0; i < fetch_nodes.size(); i++) + test::ExpectTensorNear(tensors_expected[i], tensors[i], 1e-5); + // Make sure that the representation of the folded constant is space // efficient: in particular, the whole message should be smaller than 8k // (the size needed to naively encode 1000 floats folded twice). @@ -1965,6 +2020,13 @@ TEST_F(ConstantFoldingTest, MaterializeBroadcastGradientArgs) { Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); + std::vector fetch_nodes = {"o1", "o2", "p1", "p2"}; + auto a_t = GenerateRandomTensor(TensorShape({1, 5})); + auto g_t = GenerateRandomTensor(TensorShape({1})); + auto tensors_expected = + EvaluateNodes(item.graph, fetch_nodes, {{"a", a_t}, {"g", g_t}}); + EXPECT_EQ(fetch_nodes.size(), tensors_expected.size()); + // Run a second time to make sure the optimization is idempotent. item.graph.Swap(&output); status = optimizer.Optimize(nullptr, item, &output); @@ -2005,6 +2067,11 @@ TEST_F(ConstantFoldingTest, MaterializeBroadcastGradientArgs) { } } EXPECT_EQ(6, found); + + auto tensors = EvaluateNodes(output, fetch_nodes, {{"a", a_t}, {"g", g_t}}); + EXPECT_EQ(fetch_nodes.size(), tensors.size()); + for (int i = 0; i < fetch_nodes.size(); i++) + test::ExpectTensorEqual(tensors_expected[i], tensors[i]); } TEST_F(ConstantFoldingTest, MaterializeBroadcastGradientArgs_InfiniteLoop) { @@ -2024,6 +2091,11 @@ TEST_F(ConstantFoldingTest, MaterializeBroadcastGradientArgs_InfiniteLoop) { GrapplerItem item; TF_CHECK_OK(s.ToGraphDef(&item.graph)); + std::vector fetch_nodes = {"o1", "o2"}; + auto a_t = GenerateRandomTensor(TensorShape({2, 2})); + auto tensors_expected = EvaluateNodes(item.graph, fetch_nodes, {{"a", a_t}}); + EXPECT_EQ(fetch_nodes.size(), tensors_expected.size()); + ConstantFolding optimizer(nullptr /* cpu_device */); GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); @@ -2078,6 +2150,10 @@ TEST_F(ConstantFoldingTest, MaterializeBroadcastGradientArgs_InfiniteLoop) { } } EXPECT_EQ(7, found); + auto tensors = EvaluateNodes(output, fetch_nodes, {{"a", a_t}}); + EXPECT_EQ(fetch_nodes.size(), tensors.size()); + for (int i = 0; i < fetch_nodes.size(); i++) + test::ExpectTensorEqual(tensors_expected[i], tensors[i]); } TEST_F(ConstantFoldingTest, MaterializeReductionIndices) { @@ -2539,6 +2615,8 @@ TEST_F(ConstantFoldingTest, TrivialPack) { EXPECT_EQ(tensors_expected[0].shape(), tensors[0].shape()); } +// The test does not evalute the optimized and original graphs to check if their +// outputs are the same. See b/78233179. TEST_F(ConstantFoldingTest, Enter) { GrapplerItem item; AttrValue frame_name; @@ -2555,7 +2633,7 @@ TEST_F(ConstantFoldingTest, Enter) { value_tensor.AsProtoTensorContent(value.mutable_tensor()); GraphDef& graph = item.graph; - AddNode("x", "Placeholder", {}, {{"T", type}}, &graph); + AddNode("x", "Placeholder", {}, {{"dtype", type}}, &graph); AddNode("c1", "Const", {"^x"}, {{"value", value}, {"dtype", type}}, &graph); AddNode("enter1", "Enter", {"x"}, {{"T", type}, -- GitLab From 9992042548ff268ac97ac3ebf1c584d380b0c106 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 24 Apr 2018 11:46:17 -0700 Subject: [PATCH 217/434] Go: Update generated wrapper functions for TensorFlow ops. PiperOrigin-RevId: 194120868 --- tensorflow/go/op/wrappers.go | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index d038846c4f..4d91f2b68e 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -9602,6 +9602,14 @@ func ResourceApplyAdagradUseLocking(value bool) ResourceApplyAdagradAttr { } } +// ResourceApplyAdagradUpdateSlots sets the optional update_slots attribute to value. +// If not specified, defaults to true +func ResourceApplyAdagradUpdateSlots(value bool) ResourceApplyAdagradAttr { + return func(m optionalAttr) { + m["update_slots"] = value + } +} + // Update '*var' according to the adagrad scheme. // // accum += grad * grad @@ -10676,6 +10684,14 @@ func ResourceSparseApplyAdagradUseLocking(value bool) ResourceSparseApplyAdagrad } } +// ResourceSparseApplyAdagradUpdateSlots sets the optional update_slots attribute to value. +// If not specified, defaults to true +func ResourceSparseApplyAdagradUpdateSlots(value bool) ResourceSparseApplyAdagradAttr { + return func(m optionalAttr) { + m["update_slots"] = value + } +} + // Update relevant entries in '*var' and '*accum' according to the adagrad scheme. // // That is for rows we have grad for, we update var and accum as follows: -- GitLab From e6e43da77e9be2e7e455d94e9724983a263f310a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 24 Apr 2018 11:49:35 -0700 Subject: [PATCH 218/434] Clarify error encountered when serializing critical_section_executions is a warning. PiperOrigin-RevId: 194121508 --- tensorflow/python/framework/meta_graph.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/framework/meta_graph.py b/tensorflow/python/framework/meta_graph.py index 391b17720c..923e76fc9c 100644 --- a/tensorflow/python/framework/meta_graph.py +++ b/tensorflow/python/framework/meta_graph.py @@ -439,9 +439,10 @@ def add_collection_def(meta_graph_def, key, graph=None, else: getattr(col_def, kind).value.extend([x for x in collection_list]) except Exception as e: # pylint: disable=broad-except - logging.warning("Error encountered when serializing %s.\n" + logging.warning("Issue encountered when serializing %s.\n" "Type is unsupported, or the types of the items don't " - "match field type in CollectionDef.\n%s", key, str(e)) + "match field type in CollectionDef. Note this is a warning " + "and probably safe to ignore.\n%s", key, str(e)) if key in meta_graph_def.collection_def: del meta_graph_def.collection_def[key] return -- GitLab From 7afe5df6b12309e20b471ce52a2549e6d6ea1745 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 24 Apr 2018 12:45:45 -0700 Subject: [PATCH 219/434] Extract OptimizeGraph function in meta-optimizer. PiperOrigin-RevId: 194129729 --- .../optimizers/constant_folding_test.cc | 1 - .../grappler/optimizers/meta_optimizer.cc | 261 +++++++++--------- .../core/grappler/optimizers/meta_optimizer.h | 32 ++- 3 files changed, 167 insertions(+), 127 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/constant_folding_test.cc b/tensorflow/core/grappler/optimizers/constant_folding_test.cc index 32dca29e12..25693c5c60 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding_test.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding_test.cc @@ -2528,7 +2528,6 @@ TEST_F(ConstantFoldingTest, PartialFolding_IdentityN) { ConstantFolding optimizer(nullptr /* cpu_device */); GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); - LOG(INFO) << output.DebugString(); TF_EXPECT_OK(status); EXPECT_EQ(8, output.node_size()); for (const auto& node : output.node()) { diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc index 335fb403f1..c98eef1a6a 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc @@ -36,6 +36,9 @@ namespace tensorflow { namespace grappler { namespace { + +constexpr int kDefaultNumberOfIterations = 1; + int64 NumEdges(const GraphDef& graph) { int64 num_edges = 0; for (const auto& node : graph.node()) { @@ -50,144 +53,144 @@ string PrintSizesBeforeAfter(const GraphDef& before, const GraphDef& after) { NumEdges(after), " edges (", NumEdges(after) - NumEdges(before), ")"); } + +int NumIterations(const RewriterConfig& cfg) { + return cfg.meta_optimizer_iterations() == RewriterConfig::DEFAULT_NUM_ITERS + ? kDefaultNumberOfIterations + : cfg.meta_optimizer_iterations(); +} + +// Check if optimizer is allowed to run only once. +bool IsRunOnceOptimizer(const string& name) { return name == "layout"; } + } // namespace -std::unique_ptr MetaOptimizer::NewOptimizer( - const string& optimizer) { - std::unique_ptr graph_optimizer; - if (optimizer == "pruning") { - graph_optimizer.reset(new ModelPruner()); - } - if (optimizer == "function") { - graph_optimizer.reset(new FunctionOptimizer(cfg_.function_optimization())); +#define MK_OPT(NAME, VALUE) \ + if (optimizer == NAME) return std::unique_ptr(VALUE) + +std::unique_ptr MetaOptimizer::MakeNewOptimizer( + const string& optimizer) const { + MK_OPT("pruning", new ModelPruner()); + MK_OPT("function", new FunctionOptimizer(cfg_.function_optimization())); + MK_OPT("constfold", new ConstantFolding(cpu_device_)); + MK_OPT("layout", new LayoutOptimizer()); + MK_OPT("memory", new MemoryOptimizer(RewriterConfig::MANUAL)); + MK_OPT("arithmetic", new ArithmeticOptimizer(cfg_.arithmetic_optimization())); + MK_OPT("autoparallel", new AutoParallel(cfg_.auto_parallel().num_replicas())); + MK_OPT("loop", new LoopOptimizer(cfg_.loop_optimization())); + MK_OPT("dependency", new DependencyOptimizer(cfg_.dependency_optimization())); + MK_OPT("debug_stripper", new DebugStripper()); + + return std::unique_ptr(); +} + +#undef MK_OPT + +Status MetaOptimizer::InitializeOptimizers( + std::vector>* optimizers) const { + if (!cfg_.disable_model_pruning()) { + optimizers->emplace_back(new ModelPruner()); } - if (optimizer == "constfold") { - graph_optimizer.reset(new ConstantFolding(cpu_device_)); + if (cfg_.function_optimization() != RewriterConfig::OFF) { + optimizers->emplace_back( + new FunctionOptimizer(cfg_.function_optimization())); } - if (optimizer == "layout") { - graph_optimizer.reset(new LayoutOptimizer()); + if (cfg_.debug_stripper() == RewriterConfig::ON) { + optimizers->emplace_back(new DebugStripper()); } - if (optimizer == "memory") { - graph_optimizer.reset(new MemoryOptimizer(RewriterConfig::MANUAL)); + if (cfg_.constant_folding() != RewriterConfig::OFF) { + optimizers->emplace_back( + new ConstantFolding(cfg_.constant_folding(), cpu_device_)); } - if (optimizer == "arithmetic") { - graph_optimizer.reset( + if (cfg_.arithmetic_optimization() != RewriterConfig::OFF) { + optimizers->emplace_back( new ArithmeticOptimizer(cfg_.arithmetic_optimization())); } - if (optimizer == "autoparallel") { - graph_optimizer.reset( - new AutoParallel(cfg_.auto_parallel().num_replicas())); - } - if (optimizer == "loop") { - graph_optimizer.reset(new LoopOptimizer(cfg_.loop_optimization())); + if (cfg_.loop_optimization() != RewriterConfig::OFF) { + optimizers->emplace_back(new LoopOptimizer(cfg_.loop_optimization())); } - if (optimizer == "dependency") { - graph_optimizer.reset( + if (cfg_.dependency_optimization() != RewriterConfig::OFF) { + optimizers->emplace_back( new DependencyOptimizer(cfg_.dependency_optimization())); } - if (optimizer == "debug_stripper") { - graph_optimizer.reset(new DebugStripper()); + if (cfg_.layout_optimizer() != RewriterConfig::OFF) { + optimizers->emplace_back(new LayoutOptimizer()); + } + if (cfg_.memory_optimization() != RewriterConfig::NO_MEM_OPT) { + if (cfg_.memory_optimizer_target_node_name_scope().empty()) { + optimizers->emplace_back( + // Use the default target node name prefix "gradients/" + new MemoryOptimizer(cfg_.memory_optimization())); + } else { + optimizers->emplace_back( + new MemoryOptimizer(cfg_.memory_optimization(), + cfg_.memory_optimizer_target_node_name_scope())); + } } - return graph_optimizer; + if (cfg_.auto_parallel().enable()) { + optimizers->emplace_back( + new AutoParallel(cfg_.auto_parallel().num_replicas())); + } + return Status::OK(); } -Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, - GraphDef* optimized_graph) { - std::vector> optimizers; - if (cfg_.optimizers().empty()) { - if (!cfg_.disable_model_pruning()) { - optimizers.push_back(std::unique_ptr(new ModelPruner())); - } - if (cfg_.function_optimization() != RewriterConfig::OFF) { - optimizers.push_back(std::unique_ptr( - new FunctionOptimizer(cfg_.function_optimization()))); - } - if (cfg_.debug_stripper() == RewriterConfig::ON) { - optimizers.push_back( - std::unique_ptr(new DebugStripper())); - } - if (cfg_.constant_folding() != RewriterConfig::OFF) { - optimizers.push_back(std::unique_ptr( - new ConstantFolding(cfg_.constant_folding(), cpu_device_))); - } - if (cfg_.arithmetic_optimization() != RewriterConfig::OFF) { - optimizers.push_back(std::unique_ptr( - new ArithmeticOptimizer(cfg_.arithmetic_optimization()))); - } - if (cfg_.loop_optimization() != RewriterConfig::OFF) { - optimizers.push_back(std::unique_ptr( - new LoopOptimizer(cfg_.loop_optimization()))); - } - if (cfg_.dependency_optimization() != RewriterConfig::OFF) { - optimizers.push_back(std::unique_ptr( - new DependencyOptimizer(cfg_.dependency_optimization()))); - } - if (cfg_.layout_optimizer() != RewriterConfig::OFF) { - optimizers.push_back( - std::unique_ptr(new LayoutOptimizer())); - } - if (cfg_.memory_optimization() != RewriterConfig::NO_MEM_OPT) { - if (cfg_.memory_optimizer_target_node_name_scope().empty()) { - optimizers.push_back(std::unique_ptr( - // Use the default target node name prefix "gradients/" - new MemoryOptimizer(cfg_.memory_optimization()))); - } else { - optimizers.push_back( - std::unique_ptr(new MemoryOptimizer( - cfg_.memory_optimization(), - cfg_.memory_optimizer_target_node_name_scope()))); - } +Status MetaOptimizer::InitializeOptimizersByName( + std::vector>* optimizers) const { + for (const string& optimizer_name : cfg_.optimizers()) { + auto optimizer = MakeNewOptimizer(optimizer_name); + if (optimizer) { + VLOG(2) << "Registered default graph optimizer: " << optimizer_name; + optimizers->push_back(std::move(optimizer)); + continue; } - if (cfg_.auto_parallel().enable()) { - optimizers.push_back(std::unique_ptr( - new AutoParallel(cfg_.auto_parallel().num_replicas()))); + + auto custom_optimizer = + CustomGraphOptimizerRegistry::CreateByNameOrNull(optimizer_name); + + if (custom_optimizer) { + VLOG(2) << "Registered custom graph optimizer: " << optimizer_name; + TF_RETURN_IF_ERROR(custom_optimizer->Init()); + optimizers->push_back(std::move(custom_optimizer)); + } else { + VLOG(2) << "Can't register an optimizer by name: " << optimizer_name; } + } + return Status::OK(); +} + +Status MetaOptimizer::OptimizeGraph(Cluster* cluster, const GrapplerItem& item, + GraphDef* optimized_graph) { + std::vector> optimizers; + if (cfg_.optimizers().empty()) { + TF_RETURN_IF_ERROR(InitializeOptimizers(&optimizers)); } else { - const std::set available_optimizers = { - "pruning", "function", "constfold", "layout", - "memory", "autoparallel", "arithmetic", "loop", - "dependency", "debug_stripper"}; - std::vector custom_optimizer_names; - for (const auto& optimizer_name : cfg_.optimizers()) { - if (available_optimizers.find(optimizer_name) != - available_optimizers.end()) { - optimizers.push_back(NewOptimizer(optimizer_name)); - } else { - custom_optimizer_names.push_back(optimizer_name); - } - } - // Now run the custom optimizers. - for (const auto& optimizer_name : custom_optimizer_names) { - std::unique_ptr opt = - CustomGraphOptimizerRegistry::CreateByNameOrNull(optimizer_name); - if (opt == nullptr) continue; - TF_RETURN_IF_ERROR(opt->Init()); - optimizers.push_back(std::move(opt)); - } + TF_RETURN_IF_ERROR(InitializeOptimizersByName(&optimizers)); } + VLOG(2) << "Optimize GrapplerItem: item.id=" << item.id + << " num_optimizers=" << optimizers.size(); + if (optimizers.empty()) { + VLOG(3) << "Skip graph optimization, no optimizers registered"; *optimized_graph = item.graph; return Status::OK(); } - // Some optimizers should be run only once. - const std::set run_once_optimizers = {"layout"}; - bool already_optimized = false; - const int num_iterations = - cfg_.meta_optimizer_iterations() == RewriterConfig::DEFAULT_NUM_ITERS - ? 1 - : cfg_.meta_optimizer_iterations(); + // Invariant: optimized_graph contains the most recently optimized version of + // the graph. GrapplerItem optimized_item = item; optimized_graph->Swap(&optimized_item.graph); - for (int iteration = 0; iteration < num_iterations; ++iteration) { - VLOG(1) << "Starting optimization iteration " << iteration + 1; + + bool is_optimized = false; + GraphOptimizationResult optimization_result(item.id); + + for (int iteration = 0; iteration < NumIterations(cfg_); ++iteration) { + VLOG(4) << "Starting optimization iteration " << iteration + 1; + for (const auto& optimizer : optimizers) { - // Invariant: optimized_graph contains the most recently optimized - // version of the graph. - if (iteration > 0 && run_once_optimizers.count(optimizer->name())) { - continue; - } + // Some optimizers can run only once. + if (iteration > 0 && IsRunOnceOptimizer(optimizer->name())) continue; + uint64 start_us = Env::Default()->NowMicros(); // This swaps the current optimized_graph into optimized item and // resets optimized_graph to an empty graph. @@ -195,41 +198,53 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, *optimized_graph = GraphDef(); Status status = optimizer->Optimize(cluster, optimized_item, optimized_graph); - uint64 end_us = Env::Default()->NowMicros(); - float duration_ms = (end_us - start_us) / 1000.0f; + string result; if (!status.ok()) { - VLOG(1) << "Not able to apply optimizer " << optimizer->name() << ": " - << status.ToString(); optimized_graph->Swap(&optimized_item.graph); result = status.ToString(); } else { - already_optimized = true; + is_optimized = true; + float duration_ms = (end_us - start_us) / 1000.0f; result = strings::StrCat( - optimizer->name(), ": ", PrintSizesBeforeAfter(optimized_item.graph, *optimized_graph), ", time = ", duration_ms, "ms."); } - result_.emplace_back(optimizer->name(), result); - VLOG(1) << result; + VLOG(4) << optimizer->name() << ": " << result; + + OptimizerResult optimizer_result{optimizer->name(), result}; + optimization_result.results.push_back(optimizer_result); } } - if (already_optimized) { + // Record graph optimization result. + optimization_results_.push_back(optimization_result); + + if (is_optimized) { TF_RETURN_IF_ERROR(TopologicalSort(optimized_graph)); ReassignColocation(optimized_graph); // Make sure that the optimizers preserved the graph version. DCHECK_EQ(optimized_graph->versions().producer(), item.graph.versions().producer()); } + + return Status::OK(); +} + +Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, + GraphDef* optimized_graph) { + optimization_results_.clear(); + TF_RETURN_IF_ERROR(OptimizeGraph(cluster, item, optimized_graph)); return Status::OK(); } void MetaOptimizer::PrintResult() { - for (const auto& result : result_) { - LOG(INFO) << "Return status of optimizer " << result.first << ": " - << result.second; + for (const GraphOptimizationResult& graph_result : optimization_results_) { + LOG(INFO) << "Optimization results for grappler item: " << graph_result.id; + for (const OptimizerResult& result : graph_result.results) { + LOG(INFO) << " " << result.optimizer_name << ": " << result.result; + } } } diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.h b/tensorflow/core/grappler/optimizers/meta_optimizer.h index 382cfe51d4..b8d4666248 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.h +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.h @@ -30,7 +30,7 @@ class MetaOptimizer : public GraphOptimizer { public: MetaOptimizer(DeviceBase* cpu_device, const RewriterConfig& cfg) : cpu_device_(cpu_device), cfg_(cfg) {} - ~MetaOptimizer() override {} + ~MetaOptimizer() override = default; string name() const override { return "meta_optimizer"; }; @@ -43,10 +43,36 @@ class MetaOptimizer : public GraphOptimizer { const GraphDef& optimized_graph, double result) override; private: - std::unique_ptr NewOptimizer(const string& optimizer); + std::unique_ptr MakeNewOptimizer( + const string& optimizer) const; + + // Initialize active optimizers from RewriterConfig toggles. + Status InitializeOptimizers( + std::vector>* optimizers) const; + // Initialize active optimizers from RewriterConfig optimizer names. + Status InitializeOptimizersByName( + std::vector>* optimizers) const; + + // Run optimization pass over a single GrapplerItem. Meta optimizer might run + // multiple such passes: 1) for the main graph 2) for the function library + Status OptimizeGraph(Cluster* cluster, const GrapplerItem& item, + GraphDef* optimized_graph); + DeviceBase* const cpu_device_; // may be NULL RewriterConfig cfg_; - std::vector> result_; + + struct OptimizerResult { + string optimizer_name; + string result; + }; + + struct GraphOptimizationResult { + explicit GraphOptimizationResult(const string& id) : id(id) {} + string id; + std::vector results; + }; + + std::vector optimization_results_; }; bool MetaOptimizerEnabled(const RewriterConfig& cfg); -- GitLab From 33ffc8e7ff5090b92951c7faac150042dd814085 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 24 Apr 2018 13:08:51 -0700 Subject: [PATCH 220/434] embedding_lookup_sparse documentation change. Remove "(typically from FeatureValueToId)" from args descriptions. This appears to have been an obsolete reference from an ancestor implementation. PiperOrigin-RevId: 194133212 --- tensorflow/python/ops/embedding_ops.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/ops/embedding_ops.py b/tensorflow/python/ops/embedding_ops.py index 9e46739bc1..6f2a34c731 100644 --- a/tensorflow/python/ops/embedding_ops.py +++ b/tensorflow/python/ops/embedding_ops.py @@ -331,8 +331,8 @@ def embedding_lookup_sparse(params, representing sharded embedding tensors. Alternatively, a `PartitionedVariable`, created by partitioning along dimension 0. Each element must be appropriately sized for the given `partition_strategy`. - sp_ids: N x M `SparseTensor` of int64 ids (typically from FeatureValueToId), - where N is typically batch size and M is arbitrary. + sp_ids: N x M `SparseTensor` of int64 ids where N is typically batch size + and M is arbitrary. sp_weights: either a `SparseTensor` of float / double weights, or `None` to indicate all weights should be taken to be 1. If specified, `sp_weights` must have exactly the same shape and indices as `sp_ids`. -- GitLab From 893aa776009418c841d49c924207f3cdaf1d5174 Mon Sep 17 00:00:00 2001 From: Jiri Simsa Date: Tue, 24 Apr 2018 13:13:18 -0700 Subject: [PATCH 221/434] Fixing concurrency issues in RPC factory. PiperOrigin-RevId: 194133903 --- .../contrib/rpc/python/kernel_tests/BUILD | 1 - .../rpc/python/kernel_tests/rpc_op_test.py | 1 + .../python/kernel_tests/rpc_op_test_base.py | 60 ++++--- .../rpc/grpc_rpc_factory.cc | 135 +++++++------- .../rpc/grpc_rpc_factory.h | 18 ++ tensorflow/core/util/rpc/call_container.h | 165 +++++++++++++----- tensorflow/core/util/rpc/rpc_factory.h | 5 +- 7 files changed, 251 insertions(+), 134 deletions(-) diff --git a/tensorflow/contrib/rpc/python/kernel_tests/BUILD b/tensorflow/contrib/rpc/python/kernel_tests/BUILD index f3e6731213..2311c15a68 100644 --- a/tensorflow/contrib/rpc/python/kernel_tests/BUILD +++ b/tensorflow/contrib/rpc/python/kernel_tests/BUILD @@ -28,7 +28,6 @@ py_library( py_library( name = "rpc_op_test_base", srcs = ["rpc_op_test_base.py"], - tags = ["notsan"], deps = [ ":test_example_proto_py", "//tensorflow/contrib/proto", diff --git a/tensorflow/contrib/rpc/python/kernel_tests/rpc_op_test.py b/tensorflow/contrib/rpc/python/kernel_tests/rpc_op_test.py index e2e0dbc7a2..3fc6bfbb4d 100644 --- a/tensorflow/contrib/rpc/python/kernel_tests/rpc_op_test.py +++ b/tensorflow/contrib/rpc/python/kernel_tests/rpc_op_test.py @@ -35,6 +35,7 @@ class RpcOpTest(test.TestCase, rpc_op_test_base.RpcOpTestBase): _protocol = 'grpc' invalid_method_string = 'Method not found' + connect_failed_string = 'Connect Failed' def __init__(self, methodName='runTest'): # pylint: disable=invalid-name super(RpcOpTest, self).__init__(methodName) diff --git a/tensorflow/contrib/rpc/python/kernel_tests/rpc_op_test_base.py b/tensorflow/contrib/rpc/python/kernel_tests/rpc_op_test_base.py index 89f3ee1a1c..27273d16b1 100644 --- a/tensorflow/contrib/rpc/python/kernel_tests/rpc_op_test_base.py +++ b/tensorflow/contrib/rpc/python/kernel_tests/rpc_op_test_base.py @@ -93,40 +93,39 @@ class RpcOpTestBase(object): response_values = sess.run(response_tensors) self.assertAllEqual(response_values.shape, [0]) - def testInvalidAddresses(self): - with self.test_session() as sess: - with self.assertRaisesOpError(self.invalid_method_string): - sess.run( - self.rpc( - method='/InvalidService.IncrementTestShapes', - address=self._address, - request='')) + def testInvalidMethod(self): + for method in [ + '/InvalidService.IncrementTestShapes', + self.get_method_name('InvalidMethodName') + ]: + with self.test_session() as sess: + with self.assertRaisesOpError(self.invalid_method_string): + sess.run(self.rpc(method=method, address=self._address, request='')) - with self.assertRaisesOpError(self.invalid_method_string): - sess.run( - self.rpc( - method=self.get_method_name('InvalidMethodName'), - address=self._address, - request='')) + _, status_code_value, status_message_value = sess.run( + self.try_rpc(method=method, address=self._address, request='')) + self.assertEqual(errors.UNIMPLEMENTED, status_code_value) + self.assertTrue( + self.invalid_method_string in status_message_value.decode('ascii')) - # This also covers the case of address='' - # and address='localhost:293874293874' + def testInvalidAddress(self): + # This covers the case of address='' and address='localhost:293874293874' + address = 'unix:/tmp/this_unix_socket_doesnt_exist_97820348!!@' + with self.test_session() as sess: with self.assertRaises(errors.UnavailableError): sess.run( self.rpc( method=self.get_method_name('IncrementTestShapes'), - address='unix:/tmp/this_unix_socket_doesnt_exist_97820348!!@', + address=address, request='')) - - # Test invalid method with the TryRpc op _, status_code_value, status_message_value = sess.run( self.try_rpc( - method=self.get_method_name('InvalidMethodName'), - address=self._address, + method=self.get_method_name('IncrementTestShapes'), + address=address, request='')) - self.assertEqual(errors.UNIMPLEMENTED, status_code_value) + self.assertEqual(errors.UNAVAILABLE, status_code_value) self.assertTrue( - self.invalid_method_string in status_message_value.decode('ascii')) + self.connect_failed_string in status_message_value.decode('ascii')) def testAlwaysFailingMethod(self): with self.test_session() as sess: @@ -138,6 +137,18 @@ class RpcOpTestBase(object): with self.assertRaisesOpError(I_WARNED_YOU): sess.run(response_tensors) + response_tensors, status_code, status_message = self.try_rpc( + method=self.get_method_name('AlwaysFailWithInvalidArgument'), + address=self._address, + request='') + self.assertEqual(response_tensors.shape, ()) + self.assertEqual(status_code.shape, ()) + self.assertEqual(status_message.shape, ()) + status_code_value, status_message_value = sess.run((status_code, + status_message)) + self.assertEqual(errors.INVALID_ARGUMENT, status_code_value) + self.assertTrue(I_WARNED_YOU in status_message_value.decode('ascii')) + def testSometimesFailingMethodWithManyRequests(self): with self.test_session() as sess: # Fail hard by default. @@ -197,8 +208,7 @@ class RpcOpTestBase(object): address=self._address, request=request_tensors) for _ in range(10) ] - # Launch parallel 10 calls to the RpcOp, each containing - # 20 rpc requests. + # Launch parallel 10 calls to the RpcOp, each containing 20 rpc requests. many_response_values = sess.run(many_response_tensors) self.assertEqual(10, len(many_response_values)) for response_values in many_response_values: diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_rpc_factory.cc b/tensorflow/core/distributed_runtime/rpc/grpc_rpc_factory.cc index d004abd1c1..cde6b785dc 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_rpc_factory.cc +++ b/tensorflow/core/distributed_runtime/rpc/grpc_rpc_factory.cc @@ -30,7 +30,7 @@ limitations under the License. namespace tensorflow { -namespace { +namespace internal { class GrpcCall { public: explicit GrpcCall(CallContainer* container, int index, bool try_rpc, @@ -57,9 +57,10 @@ class GrpcCall { container_->Done(s, index_); } + CallOptions* call_opts() { return &call_opts_; } + int index() { return index_; } const string& request() const { return *request_msg_; } string* response() const { return response_msg_; } - CallOptions* call_opts() { return &call_opts_; } private: CallContainer* const container_; @@ -72,7 +73,9 @@ class GrpcCall { string* status_message_; }; -} // namespace +} // namespace internal + +using internal::GrpcCall; GrpcRPCFactory::GrpcRPCFactory(OpKernelConstruction* ctx, bool fail_fast, int64 timeout_in_ms) @@ -110,28 +113,6 @@ void GrpcRPCFactory::Call(OpKernelContext* ctx, int64 num_elements, Tensor* response_t, Tensor* status_code_t, Tensor* status_message_t, AsyncOpKernel::DoneCallback done) { - auto address = address_t.flat(); - auto method = method_t.flat(); - auto request = request_t.flat(); - - // Stubs are maintained by the GrpcRPCFactory class and will be - // deleted when the class is destroyed. - ::grpc::GenericStub* singleton_stub = nullptr; - if (address.size() == 1) { - singleton_stub = GetOrCreateStubForAddress(address(0)); - } - auto get_stub = [&address, this, - singleton_stub](int64 ix) -> ::grpc::GenericStub* { - return (address.size() > 1) ? GetOrCreateStubForAddress(address(ix)) - : singleton_stub; - }; - auto get_method_ptr = [&method](int64 ix) -> const string* { - return (method.size() > 1) ? &(method(ix)) : &(method(0)); - }; - auto get_request_ptr = [&request](int64 ix) -> const string* { - return (request.size() > 1) ? &(request(ix)) : &(request(0)); - }; - if (try_rpc) { // In this case status_code will never be set in the response, // so we just set it to OK. @@ -140,49 +121,22 @@ void GrpcRPCFactory::Call(OpKernelContext* ctx, int64 num_elements, static_cast(errors::Code::OK)); } - CancellationManager* cm = ctx->cancellation_manager(); - CancellationToken cancellation_token = cm->get_cancellation_token(); - - // This object will delete itself when done. - auto* container = - new CallContainer(ctx, num_elements, fail_fast_, try_rpc, - std::move(done), cancellation_token); - - auto response = response_t->flat(); - int32* status_code_ptr = nullptr; - string* status_message_ptr = nullptr; - if (try_rpc) { - status_code_ptr = status_code_t->flat().data(); - status_message_ptr = status_message_t->flat().data(); - } - for (int i = 0; i < num_elements; ++i) { - container->calls()->emplace_back( - container, i, try_rpc, get_request_ptr(i), &response(i), - (try_rpc) ? &status_code_ptr[i] : nullptr, - (try_rpc) ? &status_message_ptr[i] : nullptr); - } + CallContainer::CreateCallFn create_call_fn = + [this, &request_t, &try_rpc, response_t, status_code_t, status_message_t]( + CallContainer* container, int index) { + CreateCall(request_t, try_rpc, index, container, response_t, + status_code_t, status_message_t); + }; - int i = 0; - for (GrpcCall& call : *(container->calls())) { - // This object will delete itself when done. - new RPCState(get_stub(i), &completion_queue_, *get_method_ptr(i), - call.request(), call.response(), - /*done=*/[&call](const Status& s) { call.Done(s); }, - call.call_opts(), fail_fast_, timeout_in_ms_); - ++i; - } + CallContainer::StartCallFn start_call_fn = + [this, &address_t, &method_t](GrpcCall* call) { + StartCall(address_t, method_t, call); + }; - // Need to register this callback after all the RPCs are in - // flight; otherwise we may try to cancel an RPC *before* it - // launches, which is a no-op, and then fall into a deadlock. - bool is_cancelled = !cm->RegisterCallback( - cancellation_token, [container]() { container->StartCancel(); }); - - if (is_cancelled) { - ctx->SetStatus(errors::Cancelled("Operation has been cancelled.")); - // container's reference counter will take care of calling done(). - container->StartCancel(); - } + // This object will delete itself when done. + new CallContainer(ctx, num_elements, fail_fast_, try_rpc, + std::move(done), std::move(create_call_fn), + std::move(start_call_fn)); } ::grpc::GenericStub* GrpcRPCFactory::GetOrCreateStubForAddress( @@ -210,4 +164,53 @@ GrpcRPCFactory::ChannelPtr GrpcRPCFactory::CreateChannelForAddress( /*target=*/address, ::grpc::InsecureChannelCredentials(), args); } +void GrpcRPCFactory::CreateCall(const Tensor& request_t, const bool try_rpc, + int index, CallContainer* container, + Tensor* response_t, Tensor* status_code_t, + Tensor* status_message_t) { + auto request = request_t.flat(); + auto get_request_ptr = [&request](int64 ix) -> const string* { + return (request.size() > 1) ? &(request(ix)) : &(request(0)); + }; + auto response = response_t->flat(); + int32* status_code_ptr = nullptr; + string* status_message_ptr = nullptr; + if (try_rpc) { + status_code_ptr = status_code_t->flat().data(); + status_message_ptr = status_message_t->flat().data(); + } + container->RegisterCall(container, index, try_rpc, get_request_ptr(index), + &response(index), + (try_rpc) ? &status_code_ptr[index] : nullptr, + (try_rpc) ? &status_message_ptr[index] : nullptr); +} + +void GrpcRPCFactory::StartCall(const Tensor& address_t, const Tensor& method_t, + GrpcCall* call) { + auto address = address_t.flat(); + auto method = method_t.flat(); + // Stubs are maintained by the GrpcRPCFactory class and will be + // deleted when the class is destroyed. + ::grpc::GenericStub* singleton_stub = nullptr; + if (address.size() == 1) { + singleton_stub = GetOrCreateStubForAddress(address(0)); + } + auto get_stub = [&address, this, + singleton_stub](int64 ix) -> ::grpc::GenericStub* { + return (address.size() > 1) ? GetOrCreateStubForAddress(address(ix)) + : singleton_stub; + }; + auto get_method_ptr = [&method](int64 ix) -> const string* { + return (method.size() > 1) ? &(method(ix)) : &(method(0)); + }; + + int index = call->index(); + // This object will delete itself when done. + new RPCState(get_stub(index), &completion_queue_, + *get_method_ptr(index), call->request(), + call->response(), + /*done=*/[call](const Status& s) { call->Done(s); }, + call->call_opts(), fail_fast_, timeout_in_ms_); +} + } // namespace tensorflow diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_rpc_factory.h b/tensorflow/core/distributed_runtime/rpc/grpc_rpc_factory.h index 34ec235aaf..29394c84b5 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_rpc_factory.h +++ b/tensorflow/core/distributed_runtime/rpc/grpc_rpc_factory.h @@ -20,10 +20,16 @@ limitations under the License. #include "tensorflow/core/distributed_runtime/rpc/grpc_util.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_types.h" +#include "tensorflow/core/util/rpc/call_container.h" #include "tensorflow/core/util/rpc/rpc_factory.h" namespace tensorflow { +// Forward declaration of GrpcCall. +namespace internal { +class GrpcCall; +} // namespace internal + class GrpcRPCFactory : public RPCFactory { public: explicit GrpcRPCFactory(OpKernelConstruction* ctx, bool fail_fast, @@ -42,6 +48,18 @@ class GrpcRPCFactory : public RPCFactory { virtual ChannelPtr CreateChannelForAddress(const string& address); private: + // Creates a call and registers it with given `container`. The `index` is used + // to index into the tensor arguments. + void CreateCall(const Tensor& request_t, const bool try_rpc, int index, + CallContainer* container, + Tensor* response_t, Tensor* status_code_t, + Tensor* status_message_t); + + // Asynchronously invokes the given `call`. The call completion is handled + // by the call container the call was previously registered with. + void StartCall(const Tensor& address_t, const Tensor& method_t, + internal::GrpcCall* call); + ::grpc::GenericStub* GetOrCreateStubForAddress(const string& address); bool fail_fast_; diff --git a/tensorflow/core/util/rpc/call_container.h b/tensorflow/core/util/rpc/call_container.h index 7f36056797..e1226a7f16 100644 --- a/tensorflow/core/util/rpc/call_container.h +++ b/tensorflow/core/util/rpc/call_container.h @@ -26,53 +26,60 @@ limitations under the License. namespace tensorflow { -template +namespace internal { +// The following class is used for coordination between a `CallContainer` +// instance and a cancellation callback to make sure that the `CallContainer` +// instance waits for the cancellation callback to be destroyed (either because +// a cancellation occurred or because the callback was deregistered) before +// deleting itself. Without this coordination the cancellation callback could +// attempt to access a `CallContainer` instance that is no longer valid. +class NotifyWhenDestroyed { + public: + explicit NotifyWhenDestroyed(std::shared_ptr notification) + : notification_(std::move(notification)) {} + + ~NotifyWhenDestroyed() { notification_->Notify(); } + + private: + std::shared_ptr notification_; +}; +} // namespace internal + +// The following class is responsible for the life cycle management of a set of +// RPC calls. The calls are started when an instance of the class is created and +// the class contract guarantees to invoke a "done" callback provided by the +// caller when all RPC calls have either completed or been cancelled. +// +// The caller should not make any assumptions about the validity of an instance +// of this class after the provided callback has been invoked, which may be +// immediately after the instance was created. +template class CallContainer { public: + typedef std::function*, int)> CreateCallFn; + typedef std::function StartCallFn; + + // Uses the provided `create_call_fn` and `start_call_fn` functions to create + // and start a set of RPC calls. When all RPC calls have either completed or + // been cancelled, the `done` callback is invoked. The caller should not make + // any assumptions about the validity of the created instance as the instance + // will delete itself after invoking the `done` callback. explicit CallContainer(OpKernelContext* ctx, int num_calls, bool fail_fast, bool try_rpc, AsyncOpKernel::DoneCallback done, - CancellationToken token) - : ctx_(ctx), - done_(std::move(done)), - token_(token), - fail_fast_(fail_fast), - try_rpc_(try_rpc) { - CHECK_GT(num_calls, 0); - - // This will run when all RPCs are finished. - reffed_status_callback_ = new ReffedStatusCallback([this](const Status& s) { - ctx_->cancellation_manager()->DeregisterCallback(token_); - ctx_->SetStatus(s); - done_(); - delete this; - }); - - // Subtract reference count from the initial creation. - core::ScopedUnref unref(reffed_status_callback_); - - for (int i = 0; i < num_calls; ++i) { - // Increase the reference on the callback for each new RPC. - reffed_status_callback_->Ref(); - } - } + CreateCallFn create_call_fn, + StartCallFn start_call_fn); - std::list* calls() { return &calls_; } + // Registers a call with this container. This method expects its arguments to + // match those of a `Call` constructor as it forwards them to an underlying + // collection, which creates a `Call` instance in place. + template + void RegisterCall(Args&&... args); - void StartCancel() { - // Once this loop is done, can no longer assume anything is valid - // because "delete this" may have been immediately called. - // Nothing should run after this loop. - for (auto& call : calls_) { - call.StartCancel(); - } - } + // Starts the cancellation of all RPC calls managed by this container. + void StartCancel(); - void Done(const Status& s, int index) { - if (!try_rpc_) { - reffed_status_callback_->UpdateStatus(s); - } - reffed_status_callback_->Unref(); - } + // Indicates that the `index`-th RPC call has finished. + void Done(const Status& s, int index); private: OpKernelContext* ctx_; @@ -81,10 +88,88 @@ class CallContainer { const CancellationToken token_; const bool fail_fast_; const bool try_rpc_; + std::shared_ptr callback_destroyed_; // Performs its own reference counting. ReffedStatusCallback* reffed_status_callback_; }; +template +CallContainer::CallContainer( + OpKernelContext* ctx, int num_calls, bool fail_fast, bool try_rpc, + AsyncOpKernel::DoneCallback done, + typename CallContainer::CreateCallFn create_call_fn, + typename CallContainer::StartCallFn start_call_fn) + : ctx_(ctx), + done_(std::move(done)), + token_(ctx->cancellation_manager()->get_cancellation_token()), + fail_fast_(fail_fast), + try_rpc_(try_rpc), + callback_destroyed_(new Notification) { + CHECK_GT(num_calls, 0); + + // This will run when all RPCs are finished. + reffed_status_callback_ = new ReffedStatusCallback([this](const Status& s) { + ctx_->cancellation_manager()->DeregisterCallback(token_); + ctx_->SetStatus(s); + done_(); + callback_destroyed_->WaitForNotification(); + delete this; + }); + + // The cancellation callback needs to be registered before the RPC calls are + // started to make sure that the callback is properly cleaned up by the + // `reffed_status_callback` when all calls complete. At the same time, the + // cancellation callback should wait for the RPC calls to be started for the + // cancellation to take effect. + std::shared_ptr notify_when_destroyed( + new internal::NotifyWhenDestroyed(callback_destroyed_)); + std::shared_ptr calls_started(new Notification); + bool is_cancelled = !ctx_->cancellation_manager()->RegisterCallback( + token_, [this, calls_started, notify_when_destroyed]() { + calls_started->WaitForNotification(); + StartCancel(); + }); + + for (int i = 0; i < num_calls; ++i) { + create_call_fn(this, i); + // Increase the reference on the callback for each new RPC. + reffed_status_callback_->Ref(); + } + for (Call& call : calls_) { + start_call_fn(&call); + } + calls_started->Notify(); + + if (is_cancelled) { + ctx_->SetStatus(errors::Cancelled("Operation has been cancelled.")); + StartCancel(); + } + + // Subtract reference count from the initial creation. + reffed_status_callback_->Unref(); +} + +template +template +void CallContainer::RegisterCall(Args&&... args) { + calls_.emplace_back(std::forward(args)...); +} + +template +void CallContainer::StartCancel() { + for (auto& call : calls_) { + call.StartCancel(); + } +} + +template +void CallContainer::Done(const Status& s, int index) { + if (!try_rpc_) { + reffed_status_callback_->UpdateStatus(s); + } + reffed_status_callback_->Unref(); +} + } // namespace tensorflow #endif // TENSORFLOW_CORE_UTIL_RPC_CALL_CONTAINER_H_ diff --git a/tensorflow/core/util/rpc/rpc_factory.h b/tensorflow/core/util/rpc/rpc_factory.h index 9bf078c0f4..c4eaaf4457 100644 --- a/tensorflow/core/util/rpc/rpc_factory.h +++ b/tensorflow/core/util/rpc/rpc_factory.h @@ -32,10 +32,11 @@ class RPCFactory { RPCFactory() {} virtual ~RPCFactory() {} - // Start a Call() to methods `method_t` at addresses `address_t` with + // Asynchronously invokes methods `method_t` at addresses `address_t` with // request strings from `request_t`. Any of these may be scalar // Tensors, in which case the operands are broadcasted. - // Upon completion of all requests, `response_t` will be populated. + // Upon completion of all requests, `response_t` will be populated and the + // `done` callback will be invoked. // // If `try_rpc` is `true`, then `status_message_t` and // `status_code_t` will be populated as well. -- GitLab From 4355b923c273a4e07655f860a95428b2db977741 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 24 Apr 2018 13:21:49 -0700 Subject: [PATCH 222/434] Implement hoisting of common prefix of unary ops to concat. PiperOrigin-RevId: 194135148 --- tensorflow/core/grappler/op_types.cc | 113 ++++++++--- tensorflow/core/grappler/op_types.h | 2 + .../optimizers/arithmetic_optimizer.cc | 187 +++++++++++++++++- .../optimizers/arithmetic_optimizer.h | 5 + .../optimizers/arithmetic_optimizer_test.cc | 102 ++++++++++ 5 files changed, 378 insertions(+), 31 deletions(-) diff --git a/tensorflow/core/grappler/op_types.cc b/tensorflow/core/grappler/op_types.cc index 9c45aed62f..f595cf6456 100644 --- a/tensorflow/core/grappler/op_types.cc +++ b/tensorflow/core/grappler/op_types.cc @@ -22,6 +22,7 @@ limitations under the License. #include "tensorflow/core/grappler/utils.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/lib/strings/str_util.h" +#include "tensorflow/core/platform/logging.h" namespace tensorflow { namespace grappler { @@ -451,43 +452,101 @@ OPDEF_PROPERTY_HELPER(Aggregate, aggregate) OPDEF_PROPERTY_HELPER(Commutative, commutative) bool IsInvolution(const NodeDef& node) { - const std::unordered_set involution_ops{ - "Conj", "Reciprocal", "Invert", "Neg", "LogicalNot"}; - return involution_ops.count(node.op()) > 0; + static const std::unordered_set* involution_ops = + CHECK_NOTNULL((new std::unordered_set{ + "Conj", "Reciprocal", "Invert", "Neg", "LogicalNot"})); + return involution_ops->count(node.op()) > 0; } bool IsValueAndOrderPreserving(const NodeDef& node) { if (NumNonControlInputs(node) == 1 && IsAggregate(node)) { return true; } - const std::unordered_set value_and_order_preserving_ops{ - "CheckNumerics", - "DebugGradientIdentity", - "DeepCopy" - "Enter", - "Exit", - "ExpandDims", - "Identity", - "IdentityN", - "PreventGradient", - "Print", - "Reshape", - "Snapshot", - "Squeeze", - "StopGradient", - }; - return value_and_order_preserving_ops.count(node.op()) > 0; + static const std::unordered_set* value_and_order_preserving_ops = + CHECK_NOTNULL((new const std::unordered_set{ + "CheckNumerics", + "DebugGradientIdentity", + "DeepCopy" + "Enter", + "Exit", + "ExpandDims", + "Identity", + "IdentityN", + "PreventGradient", + "Print", + "Reshape", + "Snapshot", + "Squeeze", + "StopGradient", + })); + return value_and_order_preserving_ops->count(node.op()) > 0; } bool IsValuePreserving(const NodeDef& node) { - const std::unordered_set value_preserving_ops{ - "InvertPermutation", - "Reverse", - "Roll", - "Transpose", - }; + static const std::unordered_set* value_preserving_ops = + CHECK_NOTNULL((new std::unordered_set{ + "InvertPermutation", + "Reverse", + "Roll", + "Transpose", + })); return IsValueAndOrderPreserving(node) || - value_preserving_ops.count(node.op()) > 0; + value_preserving_ops->count(node.op()) > 0; +} + +bool IsUnaryElementWise(const NodeDef& node) { + static const std::unordered_set* element_wise_ops = + CHECK_NOTNULL((new std::unordered_set{ + "Abs", + "Acos", + "Acosh", + "Asin", + "Asinh", + "Atan", + "Atan2", + "Atanh", + "Ceil", + "ComplexAbs", + "Conj", + "Cos", + "Cosh", + "Digamma", + "Elu" + "Erf", + "Erfc", + "Exp", + "Expm1", + "Floor", + "Inv", + "Invert", + "Isinf", + "Isnan", + "Isfinite", + "Lgamma", + "Log", + "Log1p", + "LogicalNot", + "Neg", + "Reciprocal", + "Relu", + "Relu6", + "Rint", + "Round", + "Selu", + "Rsqrt", + "Sigmoid", + "Sign", + "Sin", + "SinH", + "Softplus", + "Softsign", + "Sqrt", + "Square", + "Tan" + "Tanh", + })); + return element_wise_ops->count(node.op()) > 0 || + (!IsIdentityN(node) && IsValueAndOrderPreserving(node)); } bool HasOpDef(const NodeDef& node) { diff --git a/tensorflow/core/grappler/op_types.h b/tensorflow/core/grappler/op_types.h index 79fd05e187..7f5da19d90 100644 --- a/tensorflow/core/grappler/op_types.h +++ b/tensorflow/core/grappler/op_types.h @@ -177,6 +177,8 @@ bool IsValueAndOrderPreserving(const NodeDef& node); // function returns true if the op commutes with all element-wise operations. bool IsValuePreserving(const NodeDef& node); +bool IsUnaryElementWise(const NodeDef& node); + // Returns true if we can find an opdef corresponding to the op of the node. bool HasOpDef(const NodeDef& node); diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc index ed199c1ac8..866b993e93 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc @@ -1340,6 +1340,182 @@ class RemoveNegationStage : public ArithmeticOptimizerStage { } }; +// This optimization hoists the common prefix of unary ops of the inputs to +// concat out of the concat. +// For example: Concat([Exp(Sin(x)), Exp(Sin(y)), Exp(Sin(z))]) -> +// Exp(Sin(Concat([x, y, z]))). +// TODO(rmlarsen): Support casting. We would have to change the type attribute +// on the concat node. +class HoistCWiseUnaryFromConcatStage : public ArithmeticOptimizerStage { + public: + explicit HoistCWiseUnaryFromConcatStage( + const GraphOptimizerContext& ctx, + const ArithmeticOptimizerContext& ctx_ext) + : ArithmeticOptimizerStage("", ctx, ctx_ext) {} + + ~HoistCWiseUnaryFromConcatStage() override = default; + + bool IsSupported(const NodeDef* node) const override { + if (!IsConcat(*node)) return false; + const int n = node->attr().at("N").i(); + return n > 1; + } + + Status TrySimplify(NodeDef* concat_node, + string* simplified_node_name) override { + int prefix_length; + std::set ctrl_inputs; + TF_RETURN_IF_ERROR( + FindCommonUnaryOpPrefix(*concat_node, &prefix_length, &ctrl_inputs)); + if (prefix_length > 0) { + TF_RETURN_IF_ERROR( + HoistUnaryOpPrefix(prefix_length, &ctrl_inputs, concat_node)); + AddToOptimizationQueue(concat_node); + } + return Status::OK(); + } + + private: + void RemoveControlInputs(std::set* removed_ctrl_inputs, + NodeDef* node) const { + const int num_inputs = node->input_size(); + for (int idx = num_inputs - 1; idx >= 0; --idx) { + const string& input = node->input(idx); + if (IsControlInput(input)) { + removed_ctrl_inputs->insert(input); + ctx().node_map->RemoveOutput(NodeName(input), node->name()); + node->mutable_input()->RemoveLast(); + } else { + break; + } + } + } + + void AddControlInputs(std::set* new_ctrl_inputs, + NodeDef* node) const { + for (int idx = node->input_size() - 1; idx >= 0; --idx) { + const string& existing_input = node->input(idx); + if (IsControlInput(existing_input)) { + new_ctrl_inputs->erase(existing_input); + } else { + break; + } + } + for (const string& new_input : *new_ctrl_inputs) { + ctx().node_map->AddOutput(NodeName(new_input), node->name()); + node->add_input(new_input); + } + } + + // Returns the length of the common unary prefix chain of ops that can be + // hoisted out of concat. + Status FindCommonUnaryOpPrefix(const NodeDef& concat_node, int* prefix_length, + std::set* ctrl_inputs) const { + *prefix_length = 0; + const int n = concat_node.attr().at("N").i(); + // Follow the chains backwards from each concat input as long as all the + // following conditions hold: + // 1. The ops in all chains are the same. + // 2. The op is a unary elemenwise op. + // 3. The op output has only a single consumer. + std::vector tail(n, nullptr); + const int start = concat_node.op() == "Concat" ? 1 : 0; + const int end = start + n; + // Set up tail pointers to point to the immediate inputs to Concat. + for (int i = start; i < end; ++i) { + if (IsControlInput(concat_node.input(i))) { + return errors::FailedPrecondition("Got control input ", + concat_node.input(i), + " where normal input was expected."); + } + TF_RETURN_IF_ERROR(GetInputNode(concat_node.input(i), &tail[i - start])); + } + + bool stop = false; + ctrl_inputs->clear(); + while (!stop) { + const NodeDef* tail0 = tail[0]; + if (!IsUnaryElementWise(*tail0)) break; + for (int chain = 0; chain < n; ++chain) { + // TODO(rmlarsen): Allow and hoist outgoing control edges. + if (tail[chain]->op() != tail0->op() || + ctx().node_map->GetOutputs(tail[chain]->name()).size() > 1) { + stop = true; + break; + } + } + if (stop) break; + // We found one more op that can be hoisted. + ++(*prefix_length); + for (int chain = 0; chain < n; ++chain) { + RemoveControlInputs(ctrl_inputs, tail[chain]); + } + // Advance tail pointers to the next level. + for (int chain = 0; chain < n; ++chain) { + if (tail[chain]->input_size() == 0 || + IsControlInput(tail[chain]->input(0))) { + stop = true; + break; + } else { + NodeDef* new_tail = nullptr; + TF_RETURN_IF_ERROR(GetInputNode(tail[chain]->input(0), &new_tail)); + tail[chain] = new_tail; + } + } + } + return Status::OK(); + } + + Status HoistUnaryOpPrefix(const int prefix_length, + std::set* ctrl_inputs, + NodeDef* concat_node) { + const int n = concat_node->attr().at("N").i(); + const int start = concat_node->op() == "Concat" ? 1 : 0; + const int end = start + n; + const std::set consumers = + ctx().node_map->GetOutputs(concat_node->name()); + AddControlInputs(ctrl_inputs, concat_node); + for (int chain = 0; chain < (end - start); ++chain) { + NodeDef* tail = nullptr; + const string concat_input = concat_node->input(chain + start); + for (int distance = 0; distance < prefix_length; ++distance) { + if (distance == 0) { + TF_RETURN_IF_ERROR(GetInputNode(concat_input, &tail)); + } else { + TF_RETURN_IF_ERROR(GetInputNode(tail->input(0), &tail)); + } + } + + // Hook the node following tail directly into the concat node. + const string tail_input = tail->input(0); + concat_node->set_input(chain + start, tail_input); + ctx().node_map->UpdateInput(concat_node->name(), concat_input, + tail_input); + + if (chain == 0) { + // Reuse nodes in the first chain to process output of concat. + tail->set_input(0, concat_node->name()); + ctx().node_map->UpdateInput(tail->name(), tail_input, + concat_node->name()); + + // Update the consumers of concat to consume the end of the chain + // instead. + for (NodeDef* consumer : consumers) { + for (int idx = 0; idx < consumer->input_size(); ++idx) { + if (consumer->input(idx) == concat_node->name()) { + consumer->set_input(idx, concat_input); + ctx().node_map->UpdateInput(consumer->name(), concat_node->name(), + concat_input); + } + } + AddToOptimizationQueue(consumer); + } + } + } + return Status::OK(); + } +}; + } // namespace class UniqueNodes { @@ -1995,6 +2171,8 @@ Status ArithmeticOptimizer::SimplifyArithmeticOps(bool can_use_shapes) { pipeline.AddStage(ctx, ctx_ext); if (options_.remove_negation) pipeline.AddStage(ctx, ctx_ext); + if (options_.hoist_unary_out_of_concat) + pipeline.AddStage(ctx, ctx_ext); VLOG(1) << "Run " << pipeline.NumStages() << " arithmetic optimizer stages: " << str_util::Join(pipeline.StageNames(), ", "); @@ -2062,17 +2240,18 @@ Status ArithmeticOptimizer::Optimize(Cluster* /*cluster*/, nodes_to_preserve_ = item.NodesToPreserve(); fetch_nodes_known_ = !item.fetch.empty(); *optimized_graph = item.graph; - optimized_graph_ = optimized_graph; + GrapplerItem optimized_item(item, optimized_graph); + optimized_graph_ = &optimized_item.graph; node_map_.reset(new NodeMap(optimized_graph_)); - DedupComputations(); + if (options_.dedup_computations) { + DedupComputations(); + } // Perform topological sort on the graph in order to help AddOpsRewrite to // optimize larger subgraphs starting from the roots with more inputs. TF_RETURN_IF_ERROR(TopologicalSort(optimized_graph_)); - GrapplerItem optimized_item(item, optimized_graph); - optimized_graph_ = &optimized_item.graph; graph_properties_.reset(new GraphProperties(optimized_item)); const Status status = graph_properties_->InferStatically(false); const bool can_use_shapes = status.ok(); diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h index 344c8281eb..375f13acc1 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h @@ -56,6 +56,7 @@ class ArithmeticOptimizer : public GraphOptimizer { struct ArithmeticOptimizerOptions { // TODO(ezhulenev): flag do disable TrySimplifyAndReplaceUses in tests. // Remove when all optimizers will be migrated to separate stages. + bool dedup_computations = true; bool enable_try_simplify_and_replace = true; bool combine_add_to_addn = true; bool hoist_common_factor_out_of_aggregation = true; @@ -64,12 +65,16 @@ class ArithmeticOptimizer : public GraphOptimizer { bool remove_redundant_bitcast = true; bool remove_redundant_cast = true; bool remove_negation = true; + bool hoist_unary_out_of_concat = false; // Choose which arithmetic optimizer stages will be enabled for a given // optimization level by default. static ArithmeticOptimizerOptions Default( RewriterConfig::Toggle opt_level) { ArithmeticOptimizerOptions options; + if (opt_level == RewriterConfig::AGGRESSIVE) { + options.hoist_unary_out_of_concat = true; + } return options; } }; diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc index cb1f2ea732..df10dbdf48 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc @@ -98,6 +98,7 @@ class ArithmeticOptimizerTest : public GrapplerTest { // should explicitly enable required optimization for tests isolation void DisableAllStages(ArithmeticOptimizer* optimizer) { ArithmeticOptimizer::ArithmeticOptimizerOptions options; + options.dedup_computations = false; options.enable_try_simplify_and_replace = false; options.combine_add_to_addn = false; options.hoist_common_factor_out_of_aggregation = false; @@ -147,6 +148,10 @@ class ArithmeticOptimizerTest : public GrapplerTest { DisableAllStages(optimizer); optimizer->options_.remove_negation = true; } + void EnableOnlyHoistCWiseUnaryFromConcat(ArithmeticOptimizer* optimizer) { + DisableAllStages(optimizer); + optimizer->options_.hoist_unary_out_of_concat = true; + } }; TEST_F(ArithmeticOptimizerTest, NoOp) { @@ -2086,5 +2091,102 @@ TEST_F(ArithmeticOptimizerTest, MinimizeBroadcasts_BuildTreeUp) { EXPECT_EQ("mul1", mul3_node->input(1)); } +TEST_F(ArithmeticOptimizerTest, HoistCWiseUnaryFromConcat) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + Output a = ops::Variable(s.WithOpName("a"), {32}, DT_FLOAT); + Output b = ops::Variable(s.WithOpName("b"), {32}, DT_FLOAT); + Output c = ops::Variable(s.WithOpName("c"), {32}, DT_FLOAT); + Output axis = ops::Const(s.WithOpName("axis"), 0, {}); + Output ctrl1 = ops::Const(s.WithOpName("ctrl1"), 1, {}); + Output ctrl2 = ops::Const(s.WithOpName("ctrl2"), 2, {}); + Output ctrl3 = ops::Const(s.WithOpName("ctrl3"), 3, {}); + // Test case with chains of length 1. + Output sin_a = + ops::Sin(s.WithOpName("sin_a").WithControlDependencies(ctrl3), a); + Output exp_a = + ops::Exp(s.WithOpName("exp_a").WithControlDependencies(ctrl1), sin_a); + Output exp_b = ops::Exp(s.WithOpName("exp_b"), b); + Output exp_c = + ops::Exp(s.WithOpName("exp_c").WithControlDependencies(ctrl2), c); + Output concat = + ops::Concat(s.WithOpName("concat"), {exp_a, exp_b, exp_c}, axis); + Output id = ops::Identity(s.WithOpName("id"), concat); + + // Test case with chains of length 2. + Output exp_a2 = + ops::Exp(s.WithOpName("exp_a2").WithControlDependencies(ctrl1), sin_a); + Output exp_b2 = ops::Exp(s.WithOpName("exp_b2"), b); + Output exp_c2 = + ops::Exp(s.WithOpName("exp_c2").WithControlDependencies(ctrl2), c); + Output cos_exp_a2 = ops::Cos( + s.WithOpName("cos_exp_a2").WithControlDependencies(ctrl1), exp_a2); + Output cos_exp_b2 = ops::Cos( + s.WithOpName("cos_exp_b2").WithControlDependencies(ctrl3), exp_b2); + Output cos_exp_c2 = ops::Cos(s.WithOpName("cos_exp_c2"), exp_c2); + Output concat2 = ops::Concat(s.WithOpName("concat2"), + {cos_exp_a2, cos_exp_b2, cos_exp_c2}, axis); + Output id2 = ops::Identity(s.WithOpName("id2"), concat2); + GrapplerItem item; + item.fetch = {"id", "id2"}; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + + GraphDef output; + ArithmeticOptimizer optimizer; + EnableOnlyHoistCWiseUnaryFromConcat(&optimizer); + + OptimizeAndPrune(&optimizer, &item, &output); + int found = 0; + for (const NodeDef& node : output.node()) { + if (node.name() == "concat") { + EXPECT_EQ(6, node.input_size()); + EXPECT_EQ("sin_a", node.input(0)); + EXPECT_EQ("b", node.input(1)); + EXPECT_EQ("c", node.input(2)); + EXPECT_EQ("axis", node.input(3)); + EXPECT_EQ("^ctrl1", node.input(4)); + EXPECT_EQ("^ctrl2", node.input(5)); + found++; + } + if (node.name() == "exp_a") { + EXPECT_EQ(1, node.input_size()); + EXPECT_EQ("concat", node.input(0)); + found++; + } + if (node.name() == "id") { + EXPECT_EQ(1, node.input_size()); + EXPECT_EQ("exp_a", node.input(0)); + found++; + } + + if (node.name() == "concat2") { + EXPECT_EQ(7, node.input_size()); + EXPECT_EQ("sin_a", node.input(0)); + EXPECT_EQ("b", node.input(1)); + EXPECT_EQ("c", node.input(2)); + EXPECT_EQ("axis", node.input(3)); + EXPECT_EQ("^ctrl1", node.input(4)); + EXPECT_EQ("^ctrl2", node.input(5)); + EXPECT_EQ("^ctrl3", node.input(6)); + found++; + } + if (node.name() == "exp_a2") { + EXPECT_EQ(1, node.input_size()); + EXPECT_EQ("concat2", node.input(0)); + found++; + } + if (node.name() == "cos_exp_a2") { + EXPECT_EQ(1, node.input_size()); + EXPECT_EQ("exp_a2", node.input(0)); + found++; + } + if (node.name() == "id2") { + EXPECT_EQ(1, node.input_size()); + EXPECT_EQ("cos_exp_a2", node.input(0)); + found++; + } + } + EXPECT_EQ(7, found); +} + } // namespace grappler } // namespace tensorflow -- GitLab From a3691c4af225126e14b0df1f30969899b33de243 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 24 Apr 2018 13:47:35 -0700 Subject: [PATCH 223/434] - Add a way to specify custom updater args to updaters in the optimizer. - Create RegAdagradOptimizer which allows the user to specify whether a gradient update is allowed to update the slot vars. PiperOrigin-RevId: 194139121 --- tensorflow/contrib/opt/BUILD | 20 + .../python/training/reg_adagrad_optimizer.py | 107 ++++++ .../training/reg_adagrad_optimizer_test.py | 343 ++++++++++++++++++ 3 files changed, 470 insertions(+) create mode 100644 tensorflow/contrib/opt/python/training/reg_adagrad_optimizer.py create mode 100644 tensorflow/contrib/opt/python/training/reg_adagrad_optimizer_test.py diff --git a/tensorflow/contrib/opt/BUILD b/tensorflow/contrib/opt/BUILD index 612ecc3e63..13aa1d7e7a 100644 --- a/tensorflow/contrib/opt/BUILD +++ b/tensorflow/contrib/opt/BUILD @@ -25,6 +25,7 @@ py_library( "python/training/multitask_optimizer_wrapper.py", "python/training/nadam_optimizer.py", "python/training/powersign.py", + "python/training/reg_adagrad_optimizer.py", "python/training/sign_decay.py", "python/training/variable_clipping_optimizer.py", ], @@ -155,6 +156,25 @@ py_test( ], ) +py_test( + name = "reg_adagrad_optimizer_test", + srcs = ["python/training/reg_adagrad_optimizer_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":opt_py", + "//tensorflow/python:client_testlib", + "//tensorflow/python:constant_op", + "//tensorflow/python:dtypes", + "//tensorflow/python:embedding_ops", + "//tensorflow/python:framework_ops", + "//tensorflow/python:math_ops", + "//tensorflow/python:resource_variable_ops", + "//tensorflow/python:variable_scope", + "//tensorflow/python:variables", + "//third_party/py/numpy", + ], +) + py_test( name = "nadam_optimizer_test", srcs = ["python/training/nadam_optimizer_test.py"], diff --git a/tensorflow/contrib/opt/python/training/reg_adagrad_optimizer.py b/tensorflow/contrib/opt/python/training/reg_adagrad_optimizer.py new file mode 100644 index 0000000000..d0e0405a2c --- /dev/null +++ b/tensorflow/contrib/opt/python/training/reg_adagrad_optimizer.py @@ -0,0 +1,107 @@ +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""RegAdagrad for TensorFlow.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.ops import math_ops +from tensorflow.python.training import adagrad +from tensorflow.python.training import training_ops +from tensorflow.python.util import tf_contextlib + + +class RegAdagradOptimizer(adagrad.AdagradOptimizer): + """RegAdagrad: Adagrad with updates that optionally skip updating the slots. + + This is meant to address the problem of additional regularization terms in the + loss function affecting learning rate decay and causing hyper-param + entanglement. Example usage: + + loss = tf.nn.cross_entropy(x, labels) + reg_loss = reg_strength * tf.reduce_sum(x * x) + opt = tf.contrib.opt.RegAdagradOptimizer(learning_rate) + loss_update = opt.minimize(loss) + with opt.avoid_updating_slots(): + reg_update = opt.minimize(reg_loss) + total_update = tf.group([loss_update, reg_update]) + + # ... + + sess.run(total_update, ...) + """ + + def __init__(self, + learning_rate, + initial_accumulator_value=0.1, + use_locking=False, + name="RegAdagrad"): + super(RegAdagradOptimizer, self).__init__( + learning_rate, + initial_accumulator_value=initial_accumulator_value, + use_locking=use_locking, + name=name) + self._should_update_slots = True + + @tf_contextlib.contextmanager + def avoid_updating_slots(self): + old = self._should_update_slots + self._should_update_slots = False + try: + yield + finally: + self._should_update_slots = old + + def _apply_dense(self, grad, var): + acc = self.get_slot(var, "accumulator") + return training_ops.apply_adagrad( + var, + acc, + math_ops.cast(self._learning_rate_tensor, var.dtype.base_dtype), + grad, + use_locking=self._use_locking, + update_slots=self._should_update_slots) + + def _resource_apply_dense(self, grad, var, update_slots=True): + acc = self.get_slot(var, "accumulator") + return training_ops.resource_apply_adagrad( + var.handle, + acc.handle, + math_ops.cast(self._learning_rate_tensor, grad.dtype.base_dtype), + grad, + use_locking=self._use_locking, + update_slots=self._should_update_slots) + + def _apply_sparse(self, grad, var, update_slots=True): + acc = self.get_slot(var, "accumulator") + return training_ops.sparse_apply_adagrad( + var, + acc, + math_ops.cast(self._learning_rate_tensor, var.dtype.base_dtype), + grad.values, + grad.indices, + use_locking=self._use_locking, + update_slots=self._should_update_slots) + + def _resource_apply_sparse(self, grad, var, indices, update_slots=True): + acc = self.get_slot(var, "accumulator") + return training_ops.resource_sparse_apply_adagrad( + var.handle, + acc.handle, + math_ops.cast(self._learning_rate_tensor, grad.dtype), + grad, + indices, + use_locking=self._use_locking, + update_slots=self._should_update_slots) diff --git a/tensorflow/contrib/opt/python/training/reg_adagrad_optimizer_test.py b/tensorflow/contrib/opt/python/training/reg_adagrad_optimizer_test.py new file mode 100644 index 0000000000..ea56e1646a --- /dev/null +++ b/tensorflow/contrib/opt/python/training/reg_adagrad_optimizer_test.py @@ -0,0 +1,343 @@ +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Functional tests for Regreg_adagrad_optimizer.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.contrib.opt.python.training import reg_adagrad_optimizer +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.ops import embedding_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import resource_variable_ops +from tensorflow.python.ops import variable_scope +from tensorflow.python.ops import variables +from tensorflow.python.platform import test + + +class RegAdagradOptimizerTest(test.TestCase): + + def doTestBasic(self, use_locking=False, use_resource=False): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + with self.test_session(): + if use_resource: + var0 = resource_variable_ops.ResourceVariable([1.0, 2.0], dtype=dtype) + var1 = resource_variable_ops.ResourceVariable([3.0, 4.0], dtype=dtype) + else: + var0 = variables.Variable([1.0, 2.0], dtype=dtype) + var1 = variables.Variable([3.0, 4.0], dtype=dtype) + grads0 = constant_op.constant([0.1, 0.1], dtype=dtype) + grads1 = constant_op.constant([0.01, 0.01], dtype=dtype) + ada_opt = reg_adagrad_optimizer.RegAdagradOptimizer( + 3.0, initial_accumulator_value=0.1, use_locking=use_locking) + ada_update = ada_opt.apply_gradients( + zip([grads0, grads1], [var0, var1])) + variables.global_variables_initializer().run() + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], var0.eval()) + self.assertAllClose([3.0, 4.0], var1.eval()) + # Run 3 steps of adagrad + for _ in range(3): + ada_update.run() + # Validate updated params + self.assertAllCloseAccordingToType( + np.array([-1.6026098728179932, -0.6026098728179932]), var0.eval()) + self.assertAllCloseAccordingToType( + np.array([2.715679168701172, 3.715679168701172]), var1.eval()) + + def testBasic(self): + self.doTestBasic(use_locking=False) + + def testBasicResource(self): + self.doTestBasic(use_locking=False, use_resource=True) + + def testBasicLocked(self): + self.doTestBasic(use_locking=True) + + def testMinimizeSparseResourceVariable(self): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + with self.test_session(): + var0 = resource_variable_ops.ResourceVariable( + [[1.0, 2.0], [3.0, 4.0]], dtype=dtype) + x = constant_op.constant([[4.0], [5.0]], dtype=dtype) + pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x) + loss = pred * pred + sgd_op = reg_adagrad_optimizer.RegAdagradOptimizer(1.0).minimize(loss) + variables.global_variables_initializer().run() + # Fetch params to validate initial values + self.assertAllCloseAccordingToType([[1.0, 2.0], [3.0, 4.0]], + var0.eval()) + # Run 1 step of sgd + sgd_op.run() + # Validate updated params + self.assertAllCloseAccordingToType( + [[0, 1], [3, 4]], var0.eval(), atol=0.01) + + def testTensorLearningRate(self): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + with self.test_session(): + var0 = variables.Variable([1.0, 2.0], dtype=dtype) + var1 = variables.Variable([3.0, 4.0], dtype=dtype) + grads0 = constant_op.constant([0.1, 0.1], dtype=dtype) + grads1 = constant_op.constant([0.01, 0.01], dtype=dtype) + ada_opt = reg_adagrad_optimizer.RegAdagradOptimizer( + constant_op.constant(3.0), initial_accumulator_value=0.1) + ada_update = ada_opt.apply_gradients( + zip([grads0, grads1], [var0, var1])) + variables.global_variables_initializer().run() + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], var0.eval()) + self.assertAllClose([3.0, 4.0], var1.eval()) + # Run 3 steps of adagrad + for _ in range(3): + ada_update.run() + # Validate updated params + self.assertAllCloseAccordingToType( + np.array([-1.6026098728179932, -0.6026098728179932]), var0.eval()) + self.assertAllCloseAccordingToType( + np.array([2.715679168701172, 3.715679168701172]), var1.eval()) + + def testSparseBasic(self): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + with self.test_session(): + var0 = variables.Variable([[1.0], [2.0]], dtype=dtype) + var1 = variables.Variable([[3.0], [4.0]], dtype=dtype) + grads0 = ops.IndexedSlices( + constant_op.constant([0.1], shape=[1, 1], dtype=dtype), + constant_op.constant([0]), constant_op.constant([2, 1])) + grads1 = ops.IndexedSlices( + constant_op.constant([0.01], shape=[1, 1], dtype=dtype), + constant_op.constant([1]), constant_op.constant([2, 1])) + ada_opt = reg_adagrad_optimizer.RegAdagradOptimizer( + 3.0, initial_accumulator_value=0.1) + ada_update = ada_opt.apply_gradients( + zip([grads0, grads1], [var0, var1])) + variables.global_variables_initializer().run() + # Fetch params to validate initial values + self.assertAllClose([[1.0], [2.0]], var0.eval()) + self.assertAllClose([[3.0], [4.0]], var1.eval()) + # Run 3 step of sgd + for _ in range(3): + ada_update.run() + # Validate updated params + self.assertAllCloseAccordingToType( + np.array([[-1.6026098728179932], [2.0]]), var0.eval()) + self.assertAllCloseAccordingToType( + np.array([[3.0], [3.715679168701172]]), var1.eval()) + + def testSparseRepeatedIndices(self): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + with self.test_session(): + repeated_index_update_var = variables.Variable( + [[1.0], [2.0]], dtype=dtype) + aggregated_update_var = variables.Variable([[1.0], [2.0]], dtype=dtype) + grad_repeated_index = ops.IndexedSlices( + constant_op.constant([0.1, 0.1], shape=[2, 1], dtype=dtype), + constant_op.constant([1, 1]), constant_op.constant([2, 1])) + grad_aggregated = ops.IndexedSlices( + constant_op.constant([0.2], shape=[1, 1], dtype=dtype), + constant_op.constant([1]), constant_op.constant([2, 1])) + repeated_update = reg_adagrad_optimizer.RegAdagradOptimizer( + 3.0).apply_gradients([(grad_repeated_index, + repeated_index_update_var)]) + aggregated_update = reg_adagrad_optimizer.RegAdagradOptimizer( + 3.0).apply_gradients([(grad_aggregated, aggregated_update_var)]) + variables.global_variables_initializer().run() + self.assertAllClose(aggregated_update_var.eval(), + repeated_index_update_var.eval()) + for _ in range(3): + repeated_update.run() + aggregated_update.run() + self.assertAllClose(aggregated_update_var.eval(), + repeated_index_update_var.eval()) + + def testSparseRepeatedIndicesResourceVariable(self): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + with self.test_session(): + var_repeated = resource_variable_ops.ResourceVariable( + [1.0, 2.0], dtype=dtype) + loss_repeated = math_ops.reduce_sum( + embedding_ops.embedding_lookup(var_repeated, [0, 0])) + var_aggregated = resource_variable_ops.ResourceVariable( + [1.0, 2.0], dtype=dtype) + loss_aggregated = 2 * math_ops.reduce_sum( + embedding_ops.embedding_lookup(var_aggregated, [0])) + update_op_repeated = reg_adagrad_optimizer.RegAdagradOptimizer( + 2.0).minimize(loss_repeated) + update_op_aggregated = reg_adagrad_optimizer.RegAdagradOptimizer( + 2.0).minimize(loss_aggregated) + variables.global_variables_initializer().run() + self.assertAllCloseAccordingToType(var_repeated.eval(), + var_aggregated.eval()) + for _ in range(3): + update_op_repeated.run() + update_op_aggregated.run() + self.assertAllCloseAccordingToType(var_repeated.eval(), + var_aggregated.eval()) + + def testSparseStability(self): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + with self.test_session(): + shape = [1, 6] + var0 = variables.Variable( + [[ + 0.00872496, -0.106952, 0.110467, 0.226505, -0.0147257, + -0.0105945 + ]], + dtype=dtype) + grads0 = ops.IndexedSlices( + constant_op.constant( + [[ + -5.91278e-05, 5.31673e-05, -2.5779e-06, 4.29153e-05, + -8.4877e-05, -9.48906e-05 + ]], + shape=shape, + dtype=dtype), constant_op.constant([0]), + constant_op.constant(shape)) + ada_opt = reg_adagrad_optimizer.RegAdagradOptimizer( + 1.0, initial_accumulator_value=0.1) + ada_update = ada_opt.apply_gradients(zip([grads0], [var0])) + self.assertEqual(["accumulator"], ada_opt.get_slot_names()) + slot0 = ada_opt.get_slot(var0, "accumulator") + init = variables.global_variables_initializer() + for _ in range(100): + init.run() + ada_update.run() + self.assertAllCloseAccordingToType( + np.array([[0.1, 0.1, 0.1, 0.1, 0.1, 0.1]]), slot0.eval()) + self.assertAllCloseAccordingToType( + np.array([[ + 0.00891194, -0.10712013, 0.11047515, 0.22636929, -0.0144573, + -0.01029443 + ]]), var0.eval()) + + def testSharing(self): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + with self.test_session(): + var0 = variables.Variable([1.0, 2.0], dtype=dtype) + var1 = variables.Variable([3.0, 4.0], dtype=dtype) + grads0 = constant_op.constant([0.1, 0.1], dtype=dtype) + grads1 = constant_op.constant([0.01, 0.01], dtype=dtype) + ada_opt = reg_adagrad_optimizer.RegAdagradOptimizer(3.0) + # Apply the optimizer twice. Both applications will use + # the same accums. + ada_update1 = ada_opt.apply_gradients( + zip([grads0, grads1], [var0, var1])) + ada_update2 = ada_opt.apply_gradients( + zip([grads0, grads1], [var0, var1])) + self.assertEqual(["accumulator"], ada_opt.get_slot_names()) + slot0 = ada_opt.get_slot(var0, "accumulator") + self.assertEquals(slot0.get_shape(), var0.get_shape()) + slot1 = ada_opt.get_slot(var1, "accumulator") + self.assertEquals(slot1.get_shape(), var1.get_shape()) + variables.global_variables_initializer().run() + + # Fetch params to validate initial values. + self.assertAllClose([1.0, 2.0], var0.eval()) + self.assertAllClose([3.0, 4.0], var1.eval()) + # Mix the first and the second adagrad for 3 steps. + ada_update1.run() + ada_update2.run() + ada_update1.run() + # Validate updated params (the same as with only 1 RegAdagrad). + self.assertAllCloseAccordingToType( + np.array([-1.6026098728179932, -0.6026098728179932]), var0.eval()) + self.assertAllCloseAccordingToType( + np.array([2.715679168701172, 3.715679168701172]), var1.eval()) + + def testDynamicShapeVariable_Ok(self): + with self.test_session(): + v = variable_scope.get_variable( + "v", initializer=constant_op.constant(1.), validate_shape=False) + self.assertFalse(v.shape.is_fully_defined()) + # Creating optimizer should cause no exception. + reg_adagrad_optimizer.RegAdagradOptimizer( + 3.0, initial_accumulator_value=0.1) + + def testSkipUpdatingSlots(self): + iav = 0.130005 # A value that works with float16 + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + with self.test_session(): + var0 = variables.Variable([1.0, 2.0], dtype=dtype) + var1 = variables.Variable([3.0, 4.0], dtype=dtype) + grads0 = constant_op.constant([0.1, 0.1], dtype=dtype) + grads1 = constant_op.constant([0.01, 0.01], dtype=dtype) + ada_opt = reg_adagrad_optimizer.RegAdagradOptimizer( + 3.0, initial_accumulator_value=iav) + # Apply the optimizer twice. Both applications will use + # the same accums. + with ada_opt.avoid_updating_slots(): + ada_update = ada_opt.apply_gradients( + zip([grads0, grads1], [var0, var1])) + self.assertEqual(["accumulator"], ada_opt.get_slot_names()) + slot0 = ada_opt.get_slot(var0, "accumulator") + self.assertEquals(slot0.get_shape(), var0.get_shape()) + slot1 = ada_opt.get_slot(var1, "accumulator") + self.assertEquals(slot1.get_shape(), var1.get_shape()) + variables.global_variables_initializer().run() + + # Fetch params to validate initial values. + self.assertAllClose([1.0, 2.0], var0.eval()) + self.assertAllClose([3.0, 4.0], var1.eval()) + # Mix the first and the second adagrad for 3 steps. + for _ in range(3): + ada_update.run() + # Validate that ada_opt's slots are not updated. + self.assertAllCloseAccordingToType(np.array([iav, iav]), slot0.eval()) + self.assertAllCloseAccordingToType(np.array([iav, iav]), slot1.eval()) + + def testSparseSkipUpdatingSlots(self): + iav = 0.130005 # A value that works with float16 + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + with self.test_session(): + var0 = variables.Variable([[1.0], [2.0]], dtype=dtype) + var1 = variables.Variable([[3.0], [4.0]], dtype=dtype) + grads0 = ops.IndexedSlices( + constant_op.constant([0.1], shape=[1, 1], dtype=dtype), + constant_op.constant([0]), constant_op.constant([2, 1])) + grads1 = ops.IndexedSlices( + constant_op.constant([0.01], shape=[1, 1], dtype=dtype), + constant_op.constant([1]), constant_op.constant([2, 1])) + ada_opt = reg_adagrad_optimizer.RegAdagradOptimizer( + 3.0, initial_accumulator_value=iav) + with ada_opt.avoid_updating_slots(): + ada_update = ada_opt.apply_gradients( + zip([grads0, grads1], [var0, var1])) + slot0 = ada_opt.get_slot(var0, "accumulator") + self.assertEquals(slot0.get_shape(), var0.get_shape()) + slot1 = ada_opt.get_slot(var1, "accumulator") + self.assertEquals(slot1.get_shape(), var1.get_shape()) + + variables.global_variables_initializer().run() + # Fetch params to validate initial values + self.assertAllClose([[1.0], [2.0]], var0.eval()) + self.assertAllClose([[3.0], [4.0]], var1.eval()) + # Run 3 step of sgd + for _ in range(3): + ada_update.run() + # Validate that ada_opt's slots are not updated. + self.assertAllCloseAccordingToType( + np.array([[iav], [iav]]), slot0.eval()) + self.assertAllCloseAccordingToType( + np.array([[iav], [iav]]), slot1.eval()) + + +if __name__ == "__main__": + test.main() -- GitLab From e36ebcc88f0831c9fc16d0f5b060d076af8c0849 Mon Sep 17 00:00:00 2001 From: Yifei Feng Date: Tue, 24 Apr 2018 13:58:37 -0700 Subject: [PATCH 224/434] Revert #18251 due to the following issue: - calling convolution with args instead of kwargs from convolutionXd breaks when called within arg_scope. - intentional use cases trigger the added dimension error. PiperOrigin-RevId: 194140820 --- .../contrib/layers/python/layers/layers.py | 142 +----------------- .../layers/python/layers/layers_test.py | 15 +- 2 files changed, 7 insertions(+), 150 deletions(-) diff --git a/tensorflow/contrib/layers/python/layers/layers.py b/tensorflow/contrib/layers/python/layers/layers.py index 2f3e57653c..25c3b1e7ea 100644 --- a/tensorflow/contrib/layers/python/layers/layers.py +++ b/tensorflow/contrib/layers/python/layers/layers.py @@ -932,8 +932,7 @@ def convolution(inputs, variables_collections=None, outputs_collections=None, trainable=True, - scope=None, - conv_dims=None): + scope=None): """Adds an N-D convolution followed by an optional batch_norm layer. It is required that 1 <= N <= 3. @@ -994,10 +993,6 @@ def convolution(inputs, trainable: If `True` also add variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). scope: Optional scope for `variable_scope`. - conv_dims: Optional convolution dimensionality, when set it would use the - corresponding convolution (e.g. 2 for Conv 2D, 3 for Conv 3D, ..). When - leaved to None it would select the convolution dimensionality based on - the input rank (i.e. Conv ND, with N = input_rank - 2). Returns: A tensor representing the output of the operation. @@ -1020,9 +1015,6 @@ def convolution(inputs, inputs = ops.convert_to_tensor(inputs) input_rank = inputs.get_shape().ndims - if conv_dims is not None and conv_dims + 2 != input_rank: - raise ValueError('Convolution expects input with rank %d, got %d' % - (conv_dims + 2, input_rank)) if input_rank == 3: layer_class = convolutional_layers.Convolution1D elif input_rank == 4: @@ -1069,134 +1061,10 @@ def convolution(inputs, outputs = activation_fn(outputs) return utils.collect_named_outputs(outputs_collections, sc.name, outputs) -@add_arg_scope -def convolution1d(inputs, - num_outputs, - kernel_size, - stride=1, - padding='SAME', - data_format=None, - rate=1, - activation_fn=nn.relu, - normalizer_fn=None, - normalizer_params=None, - weights_initializer=initializers.xavier_initializer(), - weights_regularizer=None, - biases_initializer=init_ops.zeros_initializer(), - biases_regularizer=None, - reuse=None, - variables_collections=None, - outputs_collections=None, - trainable=True, - scope=None): - return convolution(inputs, - num_outputs, - kernel_size, - stride, - padding, - data_format, - rate, - activation_fn, - normalizer_fn, - normalizer_params, - weights_initializer, - weights_regularizer, - biases_initializer, - biases_regularizer, - reuse, - variables_collections, - outputs_collections, - trainable, - scope, - conv_dims=1) - -convolution1d.__doc__ = convolution.__doc__ -@add_arg_scope -def convolution2d(inputs, - num_outputs, - kernel_size, - stride=1, - padding='SAME', - data_format=None, - rate=1, - activation_fn=nn.relu, - normalizer_fn=None, - normalizer_params=None, - weights_initializer=initializers.xavier_initializer(), - weights_regularizer=None, - biases_initializer=init_ops.zeros_initializer(), - biases_regularizer=None, - reuse=None, - variables_collections=None, - outputs_collections=None, - trainable=True, - scope=None): - return convolution(inputs, - num_outputs, - kernel_size, - stride, - padding, - data_format, - rate, - activation_fn, - normalizer_fn, - normalizer_params, - weights_initializer, - weights_regularizer, - biases_initializer, - biases_regularizer, - reuse, - variables_collections, - outputs_collections, - trainable, - scope, - conv_dims=2) - -convolution2d.__doc__ = convolution.__doc__ +convolution2d = convolution +convolution3d = convolution -@add_arg_scope -def convolution3d(inputs, - num_outputs, - kernel_size, - stride=1, - padding='SAME', - data_format=None, - rate=1, - activation_fn=nn.relu, - normalizer_fn=None, - normalizer_params=None, - weights_initializer=initializers.xavier_initializer(), - weights_regularizer=None, - biases_initializer=init_ops.zeros_initializer(), - biases_regularizer=None, - reuse=None, - variables_collections=None, - outputs_collections=None, - trainable=True, - scope=None): - return convolution(inputs, - num_outputs, - kernel_size, - stride, - padding, - data_format, - rate, - activation_fn, - normalizer_fn, - normalizer_params, - weights_initializer, - weights_regularizer, - biases_initializer, - biases_regularizer, - reuse, - variables_collections, - outputs_collections, - trainable, - scope, - conv_dims=3) - -convolution3d.__doc__ = convolution.__doc__ @add_arg_scope def convolution2d_in_plane( @@ -1543,7 +1411,7 @@ def dense_to_sparse(tensor, eos_token=0, outputs_collections=None, scope=None): Args: tensor: An `int` `Tensor` to be converted to a `Sparse`. eos_token: An integer. - It is part of the target label that signifies the end of a sentence. + It is part of the target label that signfies the end of a sentence. outputs_collections: Collection to add the outputs. scope: Optional scope for name_scope. """ @@ -1687,7 +1555,7 @@ def _inner_flatten(inputs, new_rank, output_collections=None, scope=None): output_collections: Collection to which the outputs will be added. scope: Optional scope for `name_scope`. Returns: - A `Tensor` or `SparseTensor` containing the same values as `inputs`, but + A `Tensor` or `SparseTensor` conataining the same values as `inputs`, but with innermost dimensions flattened to obtain rank `new_rank`. Raises: diff --git a/tensorflow/contrib/layers/python/layers/layers_test.py b/tensorflow/contrib/layers/python/layers/layers_test.py index b01fd5d5c9..997f910a2a 100644 --- a/tensorflow/contrib/layers/python/layers/layers_test.py +++ b/tensorflow/contrib/layers/python/layers/layers_test.py @@ -310,17 +310,6 @@ class BiasAddTest(test.TestCase): class ConvolutionTest(test.TestCase): - def testInvalidShape(self): - with self.test_session(): - images_2d = random_ops.random_uniform((5, 7, 9, 3), seed=1) - with self.assertRaisesRegexp( - ValueError, 'Convolution expects input with rank 5, got 4'): - layers_lib.convolution3d(images_2d, 32, 3) - images_3d = random_ops.random_uniform((5, 6, 7, 9, 3), seed=1) - with self.assertRaisesRegexp( - ValueError, 'Convolution expects input with rank 4, got 5'): - layers_lib.convolution2d(images_3d, 32, 3) - def testInvalidDataFormat(self): height, width = 7, 9 with self.test_session(): @@ -3166,7 +3155,7 @@ class RepeatTests(test.TestCase): with self.test_session(): images = np.random.uniform(size=(5, height, width, 3)).astype(np.float32) output = _layers.repeat(images, 3, layers_lib.conv2d, 32, [3, 3]) - self.assertEqual(output.op.name, 'Repeat/convolution2d_3/Relu') + self.assertEqual(output.op.name, 'Repeat/convolution_3/Relu') self.assertListEqual(output.get_shape().as_list(), [5, 3, 3, 32]) def testRepeatWithScope(self): @@ -3760,7 +3749,7 @@ class StackTests(test.TestCase): layers_lib.convolution2d, [10, 20, 30], kernel_size=[3, 3], padding='SAME') - self.assertEqual(output.op.name, 'Stack/convolution2d_3/Relu') + self.assertEqual(output.op.name, 'Stack/convolution_3/Relu') self.assertListEqual(output.get_shape().as_list(), [5, 3, 3, 30]) def testStackWithScope(self): -- GitLab From b7bf05ade772a21bc9b74aa290a4493955ff2a1f Mon Sep 17 00:00:00 2001 From: ctiijima Date: Tue, 24 Apr 2018 14:17:14 -0700 Subject: [PATCH 225/434] typo fixes --- tensorflow/docs_src/get_started/index.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/docs_src/get_started/index.md b/tensorflow/docs_src/get_started/index.md index b28cb9df75..578080bb59 100644 --- a/tensorflow/docs_src/get_started/index.md +++ b/tensorflow/docs_src/get_started/index.md @@ -10,13 +10,13 @@ course prior to diving into TensorFlow documentation: TensorFlow is a tool for machine learning. While it contains a wide range of functionality, TensorFlow is mainly designed for deep neural network models. -The easiest way to get started with tensorflow is using Eager Execution. +The easiest way to get started with TensorFlow is by using Eager Execution. - * @{$get_started/eager}, is for anyone new to machine learning or TensorFlow. + * @{$get_started/eager}, is for anyone new to machine learning or TensorFlow. TensorFlow provides many APIs. The remainder of this section focuses on the Estimator API which provide scalable, high-performance models. -To get started with Estimators begin by reading one of the following documents: +To get started with Estimators, begin by reading one of the following documents: * @{$get_started/get_started_for_beginners}, which is aimed at readers new to machine learning. -- GitLab From 7d1fe156d79cad6818a443d3e9473dd6abd4ab56 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Tue, 24 Apr 2018 14:26:21 -0700 Subject: [PATCH 226/434] shape_tuple in array_ops.stack PiperOrigin-RevId: 194145557 --- tensorflow/python/ops/array_ops.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py index ceeabe090d..aba8beb3f4 100644 --- a/tensorflow/python/ops/array_ops.py +++ b/tensorflow/python/ops/array_ops.py @@ -935,9 +935,9 @@ def stack(values, axis=0, name="stack"): except (TypeError, ValueError): pass # Input list contains non-constant tensors - value_shape = ops.convert_to_tensor(values[0], name=name).get_shape() - if value_shape.ndims is not None: - expanded_num_dims = value_shape.ndims + 1 + value_shape = ops.convert_to_tensor(values[0], name=name)._shape_tuple() # pylint: disable=protected-access + if value_shape is not None: + expanded_num_dims = len(value_shape) + 1 if axis < -expanded_num_dims or axis >= expanded_num_dims: raise ValueError("axis = %d not in [%d, %d)" % (axis, -expanded_num_dims, expanded_num_dims)) -- GitLab From 1c9493f1b6aa56653b018ecf25af7040317fbb1b Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Tue, 24 Apr 2018 14:32:39 -0700 Subject: [PATCH 227/434] Run shape inference directly on the graphdef instead of building an intermediate graph. PiperOrigin-RevId: 194146713 --- tensorflow/core/grappler/costs/BUILD | 2 + .../core/grappler/costs/graph_properties.cc | 554 +++++++++--------- .../core/grappler/costs/graph_properties.h | 26 +- .../grappler/costs/graph_properties_test.cc | 6 + tensorflow/core/grappler/graph_view.cc | 49 ++ tensorflow/core/grappler/graph_view.h | 36 +- 6 files changed, 373 insertions(+), 300 deletions(-) diff --git a/tensorflow/core/grappler/costs/BUILD b/tensorflow/core/grappler/costs/BUILD index ddbf7f3697..35f11eac29 100644 --- a/tensorflow/core/grappler/costs/BUILD +++ b/tensorflow/core/grappler/costs/BUILD @@ -42,6 +42,8 @@ cc_library( deps = [ ":utils", "//tensorflow/core/grappler/utils:topological_sort", + "//tensorflow/core/grappler:graph_view", + "//tensorflow/core/grappler:op_types", "//tensorflow/core:core_cpu_base", "//tensorflow/core:framework", "//tensorflow/core:lib", diff --git a/tensorflow/core/grappler/costs/graph_properties.cc b/tensorflow/core/grappler/costs/graph_properties.cc index ca30ad83a0..e3c6c40306 100644 --- a/tensorflow/core/grappler/costs/graph_properties.cc +++ b/tensorflow/core/grappler/costs/graph_properties.cc @@ -19,10 +19,13 @@ limitations under the License. #include #include #include "tensorflow/core/framework/common_shape_fns.h" +#include "tensorflow/core/framework/node_def_util.h" #include "tensorflow/core/framework/tensor_shape.pb.h" #include "tensorflow/core/framework/versions.pb.h" #include "tensorflow/core/graph/graph_constructor.h" #include "tensorflow/core/grappler/costs/utils.h" +#include "tensorflow/core/grappler/graph_view.h" +#include "tensorflow/core/grappler/op_types.h" #include "tensorflow/core/grappler/utils.h" #include "tensorflow/core/grappler/utils/topological_sort.h" #include "tensorflow/core/lib/strings/str_util.h" @@ -253,16 +256,16 @@ typename DisjointSet::Rep* DisjointSet::Find(Handle value) { return root; } -bool IsQueue(const Node& node) { - return str_util::EndsWith(node.type_string(), "QueueV2"); +bool IsQueue(const NodeDef& node) { + return str_util::EndsWith(node.op(), "QueueV2"); } // Returns true if the node is an Enter op AND its input is a Queue. -bool IsEnterWithQueue(const Node& node) { - if (node.IsEnter()) { - const Node* in_node; - TF_CHECK_OK(node.input_node(0, &in_node)); - return IsQueue(*in_node); +bool IsEnterWithQueue(const NodeDef& node, const GraphView& graph) { + if (IsEnter(node)) { + GraphView::InputPort input(&node, 0); + GraphView::OutputPort fanin = graph.GetRegularFanin(input); + return IsQueue(*fanin.node); } return false; } @@ -279,8 +282,9 @@ bool HasAnyUnknownDimensions(const TensorShapeProto& proto) { return false; } +// This really should be done in an external debugging tool void VerboseLogUnknownDimensionSources( - const Graph& graph, + const GraphDef& graph, const std::map>& input_properties_map, const std::map>& @@ -295,17 +299,13 @@ void VerboseLogUnknownDimensionSources( // do not have any unknown dimensions in their inputs, but // we have some unknown dimensions in their outputs. std::map op_to_count; - for (const Node* const node : graph.nodes()) { - if (node->num_outputs() == 0) { - continue; - } - - const auto& input_properties = input_properties_map.at(node->name()); - const auto& output_properties = output_properties_map.at(node->name()); + for (const NodeDef& node : graph.node()) { + const auto& input_properties = input_properties_map.at(node.name()); + const auto& output_properties = output_properties_map.at(node.name()); bool has_unknown_inputs = false; - for (int i = 0; i < node->num_inputs(); ++i) { - if (HasAnyUnknownDimensions(input_properties[i].shape())) { + for (const auto& input_prop : input_properties) { + if (HasAnyUnknownDimensions(input_prop.shape())) { has_unknown_inputs = true; break; } @@ -315,26 +315,24 @@ void VerboseLogUnknownDimensionSources( continue; } - for (int i = 0; i < node->num_outputs(); ++i) { - if (HasAnyUnknownDimensions(output_properties[i].shape())) { + for (const auto& output_prop : output_properties) { + if (HasAnyUnknownDimensions(output_prop.shape())) { string inputs = "input_shapes=["; - for (int i = 0; i < node->num_inputs(); ++i) { - inputs += - PartialTensorShape::DebugString(input_properties[i].shape()); + for (const auto& input_prop : input_properties) { + inputs += PartialTensorShape::DebugString(input_prop.shape()); } inputs += "]"; string outputs = "output_shapes=["; - for (int i = 0; i < node->num_outputs(); ++i) { - outputs += - PartialTensorShape::DebugString(output_properties[i].shape()); + for (const auto& output_prop : output_properties) { + outputs += PartialTensorShape::DebugString(output_prop.shape()); } outputs += "]"; - VLOG(2) << "Node: " << node->name() << ", Op: " << node->def().op() - << ", " << inputs << ", " << outputs; + VLOG(2) << "Node: " << node.name() << ", Op: " << node.op() << ", " + << inputs << ", " << outputs; - op_to_count[node->def().op()]++; + op_to_count[node.op()]++; // don't log again for this node break; @@ -357,13 +355,13 @@ void VerboseLogUnknownDimensionSources( // information is refined. class TopoQueue { public: - explicit TopoQueue(const std::unordered_map& topo_order) + explicit TopoQueue(const std::unordered_map& topo_order) : queue_(CompareNodes(topo_order)) {} - void push(const Node* n) { queue_.insert(n); } - const Node* pop() { + void push(const NodeDef* n) { queue_.insert(n); } + const NodeDef* pop() { CHECK(!empty()); auto it = queue_.begin(); - const Node* n = *it; + const NodeDef* n = *it; queue_.erase(it); return n; } @@ -376,16 +374,16 @@ class TopoQueue { // use their id to ensure they're sorted topologically. struct CompareNodes { explicit CompareNodes( - const std::unordered_map& topo_ordering) + const std::unordered_map& topo_ordering) : topo_order(topo_ordering) {} - bool operator()(const Node* lhs, const Node* rhs) const { + bool operator()(const NodeDef* lhs, const NodeDef* rhs) const { return topo_order.at(lhs) < topo_order.at(rhs); } private: - const std::unordered_map& topo_order; + const std::unordered_map& topo_order; }; - std::set queue_; + std::set queue_; }; // Merge and relax symbolic shapes. @@ -396,22 +394,41 @@ class TopoQueue { class SymbolicShapeRefiner { public: explicit SymbolicShapeRefiner( - const GraphDef& graph, + const GraphView& graph, const std::unordered_map>& fed_ports) - : function_library_(OpRegistry::Global(), graph.library()), + : graph_(graph), + function_library_(OpRegistry::Global(), graph.GetGraph()->library()), fed_ports_(fed_ports) { - graph_def_version_ = graph.versions().producer(); - node_to_context_.reserve(graph.node_size()); + graph_def_version_ = graph.GetGraph()->versions().producer(); + node_to_context_.reserve(graph.GetGraph()->node_size()); + } + + const GraphView& graph() const { return graph_; } + + struct NodeContext { + const OpRegistrationData* op_data; + DataTypeVector input_types; + DataTypeVector output_types; + std::unique_ptr inference_context; + std::vector output_tensors_as_shapes; + }; + + NodeContext* GetNodeContext(const NodeDef* node) { + auto it = node_to_context_.find(node); + if (it == node_to_context_.end()) { + return nullptr; + } + return &it->second; } - InferenceContext* GetContext(const Node* node) { + InferenceContext* GetContext(const NodeDef* node) { auto it = node_to_context_.find(node); if (it == node_to_context_.end()) { return nullptr; } return it->second.inference_context.get(); } - Status UpdateNode(const Node* node, bool relax, bool* refined) { + Status UpdateNode(const NodeDef* node, bool relax, bool* refined) { NodeContext* node_context = GetNodeContext(node); if (node_context == nullptr) { TF_RETURN_IF_ERROR(AddNode(node)); @@ -421,82 +438,84 @@ class SymbolicShapeRefiner { // Check if the shapes of the nodes in the fan-in of this node have changed, // and if they have, update the node input shapes. InferenceContext* inference_context = node_context->inference_context.get(); - std::vector const_values(node->num_inputs()); - std::vector input_tensors(node->num_inputs(), nullptr); - std::vector input_tensors_as_shapes(node->num_inputs()); - - for (const Edge* e : node->in_edges()) { - if (e->IsControlEdge()) continue; - - int dst_input = e->dst_input(); - int src_output = e->src_output(); - - Node* input = e->src(); - NodeContext* c = GetNodeContext(input); - if (c == nullptr) { - return errors::FailedPrecondition( - "Input ", dst_input, " ('", input->name(), "') for '", node->name(), - "' was not previously added to ShapeRefiner."); - } + std::vector const_values(inference_context->num_inputs()); + std::vector input_tensors(inference_context->num_inputs(), + nullptr); + std::vector input_tensors_as_shapes( + inference_context->num_inputs()); + + for (int dst_input = 0; dst_input < inference_context->num_inputs(); + ++dst_input) { + GraphView::InputPort port(node, dst_input); + for (const GraphView::OutputPort fanin : graph_.GetFanin(port)) { + int src_output = fanin.port_id; + const NodeDef* input = fanin.node; + NodeContext* c = GetNodeContext(input); + if (c == nullptr) { + return errors::FailedPrecondition( + "Input ", dst_input, " ('", input->name(), "') for '", + node->name(), "' was not previously added to ShapeRefiner."); + } - if (input->IsConstant()) { - // Convert constant value into tensors. - if (const_values[dst_input].FromProto( - input->def().attr().at("value").tensor())) { - input_tensors[dst_input] = &const_values[dst_input]; - // Integer tensors of rank one can also be interpreted as a shape - // provided all their values are >= -1. - if (const_values[dst_input].dims() == 1 && - (const_values[dst_input].dtype() == DT_INT32 || - const_values[dst_input].dtype() == DT_INT64)) { - ShapeHandle tensor_shape = inference_context->Vector( - const_values[dst_input].NumElements()); - ShapeHandle shp; - if (inference_context - ->MakeShapeFromTensor(input_tensors[dst_input], - tensor_shape, &shp) - .ok()) { - input_tensors_as_shapes[dst_input] = shp; + if (IsConstant(*input)) { + // Convert constant value into tensors. + if (const_values[dst_input].FromProto( + input->attr().at("value").tensor())) { + input_tensors[dst_input] = &const_values[dst_input]; + // Integer tensors of rank one can also be interpreted as a shape + // provided all their values are >= -1. + if (const_values[dst_input].dims() == 1 && + (const_values[dst_input].dtype() == DT_INT32 || + const_values[dst_input].dtype() == DT_INT64)) { + ShapeHandle tensor_shape = inference_context->Vector( + const_values[dst_input].NumElements()); + ShapeHandle shp; + if (inference_context + ->MakeShapeFromTensor(input_tensors[dst_input], + tensor_shape, &shp) + .ok()) { + input_tensors_as_shapes[dst_input] = shp; + } } } } - } - if (c->output_tensors_as_shapes.size() > src_output) { - input_tensors_as_shapes[dst_input] = - c->output_tensors_as_shapes[src_output]; - } - - DCHECK_GE(dst_input, 0); - if (!*refined && !inference_context->input(dst_input).SameHandle( - c->inference_context->output(src_output))) { - *refined = true; - } - inference_context->SetInput(dst_input, - c->inference_context->output(src_output)); - - if (!*refined && - inference_context->requested_input_tensor_as_partial_shape( - dst_input)) { - // The input value may have changed. Since we have no way to know if - // that's indeed the case, err on the safe side. - *refined = true; - } - - // Also propagate handle shape and dtype of edges which are carrying - // resource handles. - if (e->src()->output_type(src_output) == DT_RESOURCE) { - auto* outputs = - c->inference_context->output_handle_shapes_and_types(src_output); - if (!outputs) continue; - auto* inputs = - inference_context->input_handle_shapes_and_types(dst_input); + if (c->output_tensors_as_shapes.size() > src_output) { + input_tensors_as_shapes[dst_input] = + c->output_tensors_as_shapes[src_output]; + } - if (!inputs || !EquivalentShapesAndTypes(*outputs, *inputs)) { + DCHECK_GE(dst_input, 0); + if (!*refined && !inference_context->input(dst_input).SameHandle( + c->inference_context->output(src_output))) { + *refined = true; + } + inference_context->SetInput(dst_input, + c->inference_context->output(src_output)); + + if (!*refined && + inference_context->requested_input_tensor_as_partial_shape( + dst_input)) { + // The input value may have changed. Since we have no way to know if + // that's indeed the case, err on the safe side. *refined = true; } - inference_context->set_input_handle_shapes_and_types(dst_input, - *outputs); + + // Also propagate handle shape and dtype of edges which are carrying + // resource handles. + if (node_context->input_types[dst_input] == DT_RESOURCE) { + auto* outputs = + c->inference_context->output_handle_shapes_and_types(src_output); + if (!outputs) continue; + auto* inputs = + inference_context->input_handle_shapes_and_types(dst_input); + + if (!inputs || !EquivalentShapesAndTypes(*outputs, *inputs)) { + *refined = true; + } + inference_context->set_input_handle_shapes_and_types(dst_input, + *outputs); + } } } @@ -510,10 +529,10 @@ class SymbolicShapeRefiner { input_tensors_as_shapes); // Update the shapes of the outputs. - return InferShapes(node, node_context); + return InferShapes(*node, node_context); } - Status SetUnknownShape(const Node* node, int output_port) { + Status SetUnknownShape(const NodeDef* node, int output_port) { shape_inference::ShapeHandle shape = GetUnknownOutputShape(node, output_port); InferenceContext* ctx = GetContext(node); @@ -525,7 +544,7 @@ class SymbolicShapeRefiner { } struct ShapeId { - const Node* node; + const NodeDef* node; int port_id; bool operator==(const ShapeId& other) const { return node == other.node && port_id == other.port_id; @@ -533,12 +552,12 @@ class SymbolicShapeRefiner { }; struct HashShapeId { std::size_t operator()(const ShapeId& shp) const { - return std::hash{}(shp.node) + shp.port_id; + return std::hash{}(shp.node) + shp.port_id; } }; struct DimId { - const Node* node; + const NodeDef* node; int port_id; int dim_index; bool operator==(const DimId& other) const { @@ -549,13 +568,14 @@ class SymbolicShapeRefiner { struct HashDimId { std::size_t operator()(const DimId& dim) const { - return std::hash{}(dim.node) + dim.port_id + dim.dim_index; + return std::hash{}(dim.node) + dim.port_id + + dim.dim_index; } }; // Compute the shape of the tensors outputed by node 'node' at output port // 'port_index' as the intersection of shape1 and shape2. - ShapeHandle OutputAsIntersection(const Node* node, int port_index, + ShapeHandle OutputAsIntersection(const NodeDef* node, int port_index, ShapeHandle shape1, ShapeHandle shape2) { if (shape1.SameHandle(shape2)) { return shape1; @@ -600,7 +620,7 @@ class SymbolicShapeRefiner { // Compute the shape of the tensors outputed by node 'node' at output port // 'port_index' as the union of shape1 and shape2. - ShapeHandle OutputAsUnion(const Node* node, int port_index, + ShapeHandle OutputAsUnion(const NodeDef* node, int port_index, ShapeHandle shape1, ShapeHandle shape2) { if (shape1.SameHandle(shape2)) { return shape1; @@ -670,20 +690,24 @@ class SymbolicShapeRefiner { return true; } - Status AddNode(const Node* node) { + Status AddNode(const NodeDef* node) { + NodeContext& node_ctx = node_to_context_[node]; + TF_RETURN_IF_ERROR(function_library_.LookUp(node->op(), &node_ctx.op_data)); + + TF_RETURN_IF_ERROR(InOutTypesForNode(*node, node_ctx.op_data->op_def, + &node_ctx.input_types, + &node_ctx.output_types)); + // Create the inference context for this node. - std::vector input_shapes(node->num_inputs()); + const int num_inputs = node_ctx.input_types.size(); + std::vector input_shapes(num_inputs); std::vector>> - input_handle_shapes_and_types(node->num_inputs()); - std::vector input_tensors(node->num_inputs(), nullptr); + input_handle_shapes_and_types(num_inputs); + std::vector input_tensors(num_inputs, nullptr); std::vector input_tensors_as_shapes; - NodeContext& node_ctx = node_to_context_[node]; - TF_RETURN_IF_ERROR( - function_library_.LookUp(node->type_string(), &node_ctx.op_data)); - node_ctx.inference_context.reset(new InferenceContext( - graph_def_version_, &node->def(), node->op_def(), input_shapes, + graph_def_version_, node, node_ctx.op_data->op_def, input_shapes, input_tensors, input_tensors_as_shapes, std::move(input_handle_shapes_and_types))); const Status s = node_ctx.inference_context->construction_status(); @@ -696,7 +720,7 @@ class SymbolicShapeRefiner { private: // Return the one ShapeHandle used to denote a fully unknown shape for a node // output. - ShapeHandle GetUnknownOutputShape(const Node* node, int index) { + ShapeHandle GetUnknownOutputShape(const NodeDef* node, int index) { ShapeId id{node, index}; auto it = unknown_shapes_.find(id); if (it != unknown_shapes_.end()) { @@ -709,7 +733,8 @@ class SymbolicShapeRefiner { } // Return the one ShapeHandle used to denote a fully unknown dimension for a // node output. - DimensionHandle GetUnknownOutputDim(const Node* node, int index, int dim_id) { + DimensionHandle GetUnknownOutputDim(const NodeDef* node, int index, + int dim_id) { DimId id{node, index, dim_id}; auto it = unknown_dims_.find(id); if (it != unknown_dims_.end()) { @@ -721,31 +746,25 @@ class SymbolicShapeRefiner { return dim; } - struct NodeContext { - const OpRegistrationData* op_data; - std::unique_ptr inference_context; - std::vector output_tensors_as_shapes; - }; - - Status InferShapes(const Node* node, NodeContext* c) { + Status InferShapes(const NodeDef& node, NodeContext* c) { InferenceContext* ic = c->inference_context.get(); - auto it = fed_ports_.find(node->name()); + auto it = fed_ports_.find(node.name()); const bool is_fed = it != fed_ports_.end(); // Propagate shape tensors unless the node is fed. // TODO(bsteiner) We should still propagate the shapes to the ports that // aren't fed in the case of a ShapeN node. if (!is_fed) { - if (node->type_string() == "Shape") { + if (IsShape(node)) { c->output_tensors_as_shapes.resize(1); c->output_tensors_as_shapes[0] = c->inference_context->input(0); - } else if (node->type_string() == "ShapeN") { + } else if (IsShapeN(node)) { c->output_tensors_as_shapes.resize(c->inference_context->num_inputs()); for (int i = 0; i < c->inference_context->num_inputs(); ++i) { c->output_tensors_as_shapes[i] = c->inference_context->input(i); } - } else if (node->type_string() == "ConcatV2") { + } else if (node.op() == "ConcatV2") { bool valid = true; ShapeHandle result; for (int i = 0; i < ic->num_inputs() - 1; ++i) { @@ -763,7 +782,7 @@ class SymbolicShapeRefiner { c->output_tensors_as_shapes.resize(1); c->output_tensors_as_shapes[0] = result; } - } else if (node->type_string() == "Slice") { + } else if (IsSlice(node)) { ShapeHandle input = ic->input_tensors_as_shapes()[0]; bool valid = ic->RankKnown(input); const Tensor* slice_offset = ic->input_tensor(1); @@ -800,22 +819,16 @@ class SymbolicShapeRefiner { // It is possible to feed node output ports with tensors of any shape: as // a result, the shape of a fed port is completely unknown. for (const int output_port : it->second) { - status.Update(SetUnknownShape(node, output_port)); + status.Update(SetUnknownShape(&node, output_port)); } } return status; } - NodeContext* GetNodeContext(const Node* node) { - auto it = node_to_context_.find(node); - if (it == node_to_context_.end()) { - return nullptr; - } - return &it->second; - } - + private: + const GraphView& graph_; int graph_def_version_; - std::unordered_map node_to_context_; + std::unordered_map node_to_context_; std::unordered_map unknown_shapes_; std::unordered_map unknown_dims_; FunctionLibraryDefinition function_library_; @@ -874,7 +887,7 @@ class SymbolicShapeManager { }; Status GraphProperties::MergeEnqueueShapesAndTypes( - SymbolicShapeRefiner* shape_refiner, const Node* qnode, + SymbolicShapeRefiner* shape_refiner, const NodeDef* qnode, const std::vector& shapes_and_types, std::vector* queue_shapes_and_types) { if (shapes_and_types.size() != queue_shapes_and_types->size()) { @@ -897,7 +910,7 @@ Status GraphProperties::MergeEnqueueShapesAndTypes( } Status GraphProperties::RelaxEnqueueShapesAndMergeTypes( - SymbolicShapeRefiner* shape_refiner, const Node* qnode, + SymbolicShapeRefiner* shape_refiner, const NodeDef* qnode, const std::vector& shapes_and_types, std::vector* queue_shapes_and_types) { if (shapes_and_types.size() != queue_shapes_and_types->size()) { @@ -925,7 +938,7 @@ Status GraphProperties::RelaxEnqueueShapesAndMergeTypes( // inputs are UnknownShapes. So we need to ignore the input from NextIteration // nodes to propagate any known shape from the Merge node. Status GraphProperties::UpdateMergeNode(SymbolicShapeRefiner* shape_refiner, - const Node* node, bool relax, + const NodeDef* node, bool relax, bool* new_shapes) const { InferenceContext* c = shape_refiner->GetContext(node); if (!c) { @@ -942,25 +955,24 @@ Status GraphProperties::UpdateMergeNode(SymbolicShapeRefiner* shape_refiner, ShapeHandle out; bool out_initialized = false; - for (const Edge* e : node->in_edges()) { - if (e->IsControlEdge()) { - continue; - } + for (const GraphView::Edge fanin : + shape_refiner->graph().GetFaninEdges(*node, false)) { // Skip back edges during the initial propagation phase. This is equivalent // to assuming that all the inputs to the merge nodes are fed by the same // shape, and will be corrected as needed in the relaxation phase. - if (!relax && e->src()->IsNextIteration()) { + if (!relax && IsNextIteration(*fanin.src.node)) { continue; } - InferenceContext* in = shape_refiner->GetContext(e->src()); + InferenceContext* in = shape_refiner->GetContext(fanin.src.node); if (!relax && !in) { // Handling a loop for the first time, the back edge won't have any shape // info. continue; } - ShapeHandle input = in->output(e->src_output()); - c->SetInput(e->dst_input(), input); + ShapeHandle input = in->output(fanin.src.port_id); + CHECK_EQ(fanin.tgt.node, node); + c->SetInput(fanin.tgt.port_id, input); if (!out_initialized) { out_initialized = true; out = input; @@ -984,7 +996,7 @@ Status GraphProperties::UpdateMergeNode(SymbolicShapeRefiner* shape_refiner, // Manually propagate the input shape for Enter nodes and update any Merge node // outputs. Status GraphProperties::UpdateEnter(SymbolicShapeRefiner* shape_refiner, - const Node* node, bool relax, + const NodeDef* node, bool relax, bool* new_shapes) { auto enter_ctx = shape_refiner->GetContext(node); if (!enter_ctx) { @@ -992,33 +1004,27 @@ Status GraphProperties::UpdateEnter(SymbolicShapeRefiner* shape_refiner, enter_ctx = shape_refiner->GetContext(node); } - for (const Edge* e : node->in_edges()) { - if (e->IsControlEdge()) { - continue; - } - InferenceContext* in = shape_refiner->GetContext(e->src()); - ShapeHandle input = in->output(e->src_output()); - if (!enter_ctx->output(0).SameHandle(input)) { - if (relax) { - enter_ctx->RelaxInput(0, input); - } else { - enter_ctx->MergeInput(0, input); - } - enter_ctx->set_output(0, input); - *new_shapes = true; - } + GraphView::InputPort inp(node, 0); + GraphView::OutputPort fanin = shape_refiner->graph().GetRegularFanin(inp); + + InferenceContext* in = shape_refiner->GetContext(fanin.node); + ShapeHandle input = in->output(fanin.port_id); + if (!enter_ctx->output(0).SameHandle(input)) { + enter_ctx->SetInput(0, input); + enter_ctx->set_output(0, input); + *new_shapes = true; } return Status::OK(); } -Status GraphProperties::UpdateShapes( - SymbolicShapeRefiner* shape_refiner, bool relax, - const Node* n, bool* new_shapes) const { - if (n->IsEnter()) { +Status GraphProperties::UpdateShapes(SymbolicShapeRefiner* shape_refiner, + bool relax, const NodeDef* n, + bool* new_shapes) const { + if (IsEnter(*n)) { // The Enter shape function always forwards an UnknownShape, so do the right // thing here. TF_RETURN_IF_ERROR(UpdateEnter(shape_refiner, n, relax, new_shapes)); - } else if (n->IsMerge()) { + } else if (IsMerge(*n)) { // Properly handle merge nodes. TF_RETURN_IF_ERROR(UpdateMergeNode(shape_refiner, n, relax, new_shapes)); } else { @@ -1028,7 +1034,7 @@ Status GraphProperties::UpdateShapes( if (updated) { // We want to avoid propagating through loops on the merge pass because // the shapes are not guaranteed to converge. - if (relax || !n->IsNextIteration()) { + if (relax || !IsNextIteration(*n)) { *new_shapes = true; } } @@ -1039,8 +1045,8 @@ Status GraphProperties::UpdateShapes( // Propagates the shapes in the transitive fan-out of . Status GraphProperties::PropagateShapes( SymbolicShapeRefiner* shape_refiner, bool relax, TopoQueue* new_shapes, - const std::unordered_map>& - resources, + const std::unordered_map>& resources, int num_loops) const { // Limit the number of iterations to prevent infinite loops in the presence of // incorrect shape functions. The algoritm should converge in at most @@ -1062,15 +1068,13 @@ Status GraphProperties::PropagateShapes( int64 num_loop_iterations = 0; while (!new_shapes->empty() && num_loop_iterations++ < max_loop_iterations) { - const Node* n = new_shapes->pop(); + const NodeDef* n = new_shapes->pop(); bool updated = false; TF_RETURN_IF_ERROR(UpdateShapes(shape_refiner, relax, n, &updated)); if (updated) { - for (const Edge* e : n->out_edges()) { - if (!e->IsControlEdge()) { - const Node* fanout = e->dst(); - new_shapes->push(fanout); - } + for (const GraphView::InputPort fanout : + shape_refiner->graph().GetFanouts(*n, false)) { + new_shapes->push(fanout.node); } } } @@ -1093,10 +1097,11 @@ Status GraphProperties::PropagateShapes( } Status GraphProperties::UpdateResource( - const Node* qnode, const std::unordered_set& queue_inputs, + const NodeDef* qnode, + const std::unordered_set& queue_inputs, SymbolicShapeRefiner* shape_refiner, TopoQueue* new_shapes) { // Proceed only if qnode is a queue or an Enter with queue input. - if (!IsQueue(*qnode) && !IsEnterWithQueue(*qnode)) { + if (!IsQueue(*qnode) && !IsEnterWithQueue(*qnode, shape_refiner->graph())) { return Status::OK(); } auto qctx = shape_refiner->GetContext(qnode); @@ -1109,16 +1114,17 @@ Status GraphProperties::UpdateResource( // are in. std::vector queue_shapes_and_types; for (const auto& node : queue_inputs) { - auto ctx = shape_refiner->GetContext(node); + auto ctx = shape_refiner->GetNodeContext(node); if (!ctx) { continue; } // TODO(bsteiner): handle EnqueueMany as well. - if (node->type_string().find("Enqueue") != std::string::npos && - node->type_string().find("EnqueueMany") == std::string::npos) { + if (node->op().find("Enqueue") != std::string::npos && + node->op().find("EnqueueMany") == std::string::npos) { std::vector shapes_and_types; - for (int i = 1; i < ctx->num_inputs(); ++i) { - shapes_and_types.push_back({ctx->input(i), node->input_type(i)}); + for (int i = 1; i < ctx->input_types.size(); ++i) { + shapes_and_types.push_back( + {ctx->inference_context->input(i), ctx->input_types[i]}); } if (queue_shapes_and_types.empty()) { queue_shapes_and_types = shapes_and_types; @@ -1134,11 +1140,9 @@ Status GraphProperties::UpdateResource( queue_shapes_and_types)) { qctx->set_output_handle_shapes_and_types(0, queue_shapes_and_types); - for (const Edge* e : qnode->out_edges()) { - if (!e->IsControlEdge()) { - const Node* fanout = e->dst(); - new_shapes->push(fanout); - } + for (const GraphView::InputPort fanout : + shape_refiner->graph().GetFanouts(*qnode, false)) { + new_shapes->push(fanout.node); } } @@ -1148,18 +1152,6 @@ Status GraphProperties::UpdateResource( Status GraphProperties::InferStatically(bool assume_valid_feeds) { FunctionLibraryDefinition function_library(OpRegistry::Global(), item_.graph.library()); - Graph graph(function_library); - graph_ = &graph; - ImportGraphDefOptions options; - // Graph optimization happens at the late stage of graph execution, - // when colocation constraints are already validated previously and - // the device placement of nodes has also completed, so there - // is no need to validate colocation constraints again. - options.validate_colocation_constraints = false; - options.validate_shape = false; - Status s = ImportGraphDef(options, item_.graph, &graph, nullptr); - TF_RETURN_IF_ERROR(s); - std::unordered_map> fed_ports; if (!assume_valid_feeds) { for (const auto& feed : item_.feed) { @@ -1172,46 +1164,45 @@ Status GraphProperties::InferStatically(bool assume_valid_feeds) { std::unordered_map topo_order; TF_RETURN_IF_ERROR(ComputeTopologicalOrder(item_.graph, &topo_order)); - std::unordered_map order_by_name; - for (const auto topo : topo_order) { - order_by_name[topo.first->name()] = topo.second; - } + GraphView graph_view(&item_.graph); - // List the resources and the nodes using them. Also collect the Enter and - // Merge nodes. - std::unordered_map graph_topo_order; - std::unordered_map> resources; - std::unordered_set merge_nodes; - std::unordered_set fed_nodes; - std::unordered_set primary_inputs; + // List the resources and the nodes using them. Also collect the Merge nodes, + // fed nodes, and primary inputs. + std::unordered_map> + resources; + std::unordered_set merge_nodes; + std::unordered_set fed_nodes; + std::unordered_set primary_inputs; int num_loops = 0; - for (const Node* const node : graph.nodes()) { - auto it = order_by_name.find(node->name()); - if (it == order_by_name.end()) { - continue; - } - graph_topo_order[node] = it->second; - - for (int i = 0; i < node->num_inputs(); ++i) { - if (node->input_type(i) == DataType::DT_RESOURCE) { - const Node* resource; - TF_CHECK_OK(node->input_node(i, &resource)); - resources[resource].insert(node); - } - } - if (node->num_inputs() == 0) { - primary_inputs.insert(node); - } else if (node->IsMerge()) { - merge_nodes.insert(node); - } else if (node->IsNextIteration()) { + for (const NodeDef& node : item_.graph.node()) { + if (NumNonControlInputs(node) == 0) { + primary_inputs.insert(&node); + } else if (IsMerge(node)) { + merge_nodes.insert(&node); + } else if (IsNextIteration(node)) { ++num_loops; + } else { + const OpRegistrationData* op_data; + TF_RETURN_IF_ERROR(function_library.LookUp(node.op(), &op_data)); + DataTypeVector input_types; + DataTypeVector output_types; + TF_RETURN_IF_ERROR(InOutTypesForNode(node, op_data->op_def, &input_types, + &output_types)); + for (int i = 0; i < input_types.size(); ++i) { + if (input_types[i] == DataType::DT_RESOURCE) { + GraphView::InputPort input(&node, i); + const GraphView::OutputPort resource = + graph_view.GetRegularFanin(input); + resources[resource.node].insert(&node); + } + } } - if (fed_ports.find(node->name()) != fed_ports.end()) { - fed_nodes.insert(node); + if (fed_ports.find(node.name()) != fed_ports.end()) { + fed_nodes.insert(&node); } } - SymbolicShapeRefiner refiner(item_.graph, fed_ports); + SymbolicShapeRefiner refiner(graph_view, fed_ports); // We propagate shapes through the graph in two phases. In the first phase, we // exclusively merge shapes but we do not propagate shapes through the @@ -1219,19 +1210,19 @@ Status GraphProperties::InferStatically(bool assume_valid_feeds) { // we exclusively relax shapes and propagate shapes through loops until // reaching fixed point. for (int relax = 0; relax < 2; relax++) { - TopoQueue new_shapes(graph_topo_order); + TopoQueue new_shapes(topo_order); // Seed the propagation of shapes through merge nodes. if (relax) { - for (const Node* node : merge_nodes) { + for (const NodeDef* node : merge_nodes) { new_shapes.push(node); } } // Also seed the propagation of shapes in the fanout of primary inputs. - for (const Node* node : primary_inputs) { + for (const NodeDef* node : primary_inputs) { new_shapes.push(node); } // Also seed the propagation of shapes in the fanout of fed nodes. - for (const Node* node : fed_nodes) { + for (const NodeDef* node : fed_nodes) { new_shapes.push(node); } // Propagate shapes normally. @@ -1242,14 +1233,14 @@ Status GraphProperties::InferStatically(bool assume_valid_feeds) { // Track shapes globally across the graph. SymbolicShapeManager shape_manager; bool found_error = false; - for (const Node* const node : graph.nodes()) { - auto node_ctx = refiner.GetContext(node); + for (const NodeDef& node : item_.graph.node()) { + auto node_ctx = refiner.GetContext(&node); if (!node_ctx) { continue; } // Skip any information that comes from fed nodes. - if (fed_ports.find(node->name()) != fed_ports.end()) { - VLOG(2) << "Skipping feed node shape: " << node->name(); + if (fed_ports.find(node.name()) != fed_ports.end()) { + VLOG(2) << "Skipping feed node shape: " << node.name(); continue; } for (const auto& merged_shapes : node_ctx->MergedShapes()) { @@ -1273,61 +1264,56 @@ Status GraphProperties::InferStatically(bool assume_valid_feeds) { } } - for (const Node* const node : graph.nodes()) { - VLOG(3) << "Filling in graph properties for node: " << node->name(); - auto ctx = refiner.GetContext(node); + for (const NodeDef& node : item_.graph.node()) { + VLOG(3) << "Filling in graph properties for node: " << node.name(); + auto ctx = refiner.GetNodeContext(&node); if (!ctx) { continue; } // Fill input properties. { - CHECK_EQ(ctx->num_inputs(), node->num_inputs()); - auto& input_properties = input_properties_[node->name()]; + // CHECK_EQ(ctx->num_inputs(), node.num_inputs()); + auto& input_properties = input_properties_[node.name()]; // Should always be empty, node names in graph are supposed to be unique. CHECK_EQ(input_properties.size(), 0); - input_properties.resize(ctx->num_inputs()); - for (int i = 0; i < ctx->num_inputs(); ++i) { - shape_manager.AsTensorProperties(ctx->input(i), node->input_type(i), + input_properties.resize(ctx->inference_context->num_inputs()); + GraphView::InputPort input(&node, -1); + for (int i = 0; i < ctx->inference_context->num_inputs(); ++i) { + shape_manager.AsTensorProperties(ctx->inference_context->input(i), + ctx->input_types[i], &input_properties[i]); - } - for (const auto& edge : node->in_edges()) { - if (edge->IsControlEdge()) { - continue; - } - if (!edge->src()->IsConstant()) { - continue; - } - const int input_id = edge->dst_input(); - if (input_id >= input_properties.size()) { + input.port_id = i; + GraphView::OutputPort fanin = graph_view.GetRegularFanin(input); + if (!IsConstant(*fanin.node)) { continue; } - const NodeDef& node = edge->src()->def(); - const TensorProto& raw_val = node.attr().at("value").tensor(); - *input_properties[input_id].mutable_value() = raw_val; + const TensorProto& raw_val = fanin.node->attr().at("value").tensor(); + *input_properties[i].mutable_value() = raw_val; } } // Fill output properties. { - CHECK_EQ(ctx->num_outputs(), node->num_outputs()); - auto& output_properties = output_properties_[node->name()]; + // CHECK_EQ(ctx->num_outputs(), node->num_outputs()); + auto& output_properties = output_properties_[node.name()]; // Should always be empty, node names in graph are supposed to be unique. CHECK_EQ(output_properties.size(), 0); - output_properties.resize(ctx->num_outputs()); - for (int i = 0; i < ctx->num_outputs(); ++i) { - shape_manager.AsTensorProperties(ctx->output(i), node->output_type(i), + output_properties.resize(ctx->inference_context->num_outputs()); + for (int i = 0; i < ctx->inference_context->num_outputs(); ++i) { + shape_manager.AsTensorProperties(ctx->inference_context->output(i), + ctx->output_types[i], &output_properties[i]); } } } // Help trace the unknown dimensions to their origins. - VerboseLogUnknownDimensionSources(graph, input_properties_, + VerboseLogUnknownDimensionSources(item_.graph, input_properties_, output_properties_); return Status::OK(); diff --git a/tensorflow/core/grappler/costs/graph_properties.h b/tensorflow/core/grappler/costs/graph_properties.h index a4e3031db1..485324c466 100644 --- a/tensorflow/core/grappler/costs/graph_properties.h +++ b/tensorflow/core/grappler/costs/graph_properties.h @@ -24,7 +24,6 @@ limitations under the License. #include "tensorflow/core/grappler/grappler_item.h" namespace tensorflow { -class Graph; namespace grappler { @@ -79,40 +78,41 @@ class GraphProperties { // Merges shapes , determined from an EnqueueV2 node, into // <*queue_shapes_and_types>. static Status MergeEnqueueShapesAndTypes( - SymbolicShapeRefiner* shape_refiner, const Node* qnode, + SymbolicShapeRefiner* shape_refiner, const NodeDef* qnode, const std::vector& shapes_and_types, std::vector* queue_shapes_and_types); // Relaxes shapes , determined from an EnqueueV2 node, into // <*queue_shapes_and_types>. static Status RelaxEnqueueShapesAndMergeTypes( - SymbolicShapeRefiner* shape_refiner, const Node* qnode, + SymbolicShapeRefiner* shape_refiner, const NodeDef* qnode, const std::vector& shapes_and_types, std::vector* queue_shapes_and_types); // Update the shapes for qnode. If output shapes of qnode have changed, // enqueue its fanout in 'new_shapes'. static Status UpdateResource( - const Node* qnode, const std::unordered_set& queue_inputs, + const NodeDef* qnode, + const std::unordered_set& queue_inputs, SymbolicShapeRefiner* shape_refiner, TopoQueue* new_shapes); // Update the output shapes of a Merge node, and enqueue its fanout in // new_shapes if needed. - Status UpdateMergeNode(SymbolicShapeRefiner* shape_refiner, const Node* node, - bool relax, bool* new_shapes) const; + Status UpdateMergeNode(SymbolicShapeRefiner* shape_refiner, + const NodeDef* node, bool relax, + bool* new_shapes) const; // Process the Enter node, and enqueue its fanout in new_shapes if needed. static Status UpdateEnter(SymbolicShapeRefiner* shape_refiner, - const Node* node, bool relax, bool* new_shapes); + const NodeDef* node, bool relax, bool* new_shapes); // Update the shapes for node 'n'. If output shapes for n have changed, // enqueue its fanout in 'new_shapes'. - Status UpdateShapes( - SymbolicShapeRefiner* shape_refiner, bool relax, - const Node* n, bool* new_shapes) const; + Status UpdateShapes(SymbolicShapeRefiner* shape_refiner, bool relax, + const NodeDef* n, bool* new_shapes) const; // Propagate the shapes for the nodes enqueued in new_shapes and their // transitive fanout until a fixed point is reached. Status PropagateShapes( SymbolicShapeRefiner* shape_refiner, bool relax, TopoQueue* new_shapes, - const std::unordered_map>& - resources, + const std::unordered_map>& resources, int num_loops) const; // Data members @@ -120,8 +120,6 @@ class GraphProperties { std::map> input_properties_; std::map> output_properties_; const std::vector missing_properties_; - - Graph* graph_; }; } // end namespace grappler diff --git a/tensorflow/core/grappler/costs/graph_properties_test.cc b/tensorflow/core/grappler/costs/graph_properties_test.cc index 3de697bd37..afe334dfa2 100644 --- a/tensorflow/core/grappler/costs/graph_properties_test.cc +++ b/tensorflow/core/grappler/costs/graph_properties_test.cc @@ -16,6 +16,7 @@ limitations under the License. #include "tensorflow/core/grappler/costs/graph_properties.h" #include "tensorflow/cc/framework/scope.h" #include "tensorflow/cc/ops/standard_ops.h" +#include "tensorflow/core/framework/graph_def_util.h" #include "tensorflow/core/framework/node_def_builder.h" #include "tensorflow/core/framework/tensor_shape.pb.h" #include "tensorflow/core/framework/tensor_testutil.h" @@ -955,6 +956,11 @@ TEST_F(GraphPropertiesTest, Performance) { string filename = io::JoinPath(testing::TensorFlowSrcRoot(), kTestDataPath, "large_graph.pbtxt.html"); TF_CHECK_OK(ReadGraphDefFromFile(filename, &item.graph)); + TF_CHECK_OK(AddDefaultAttrsToGraphDef( + &item.graph, + FunctionLibraryDefinition(OpRegistry::Global(), item.graph.library()), 0, + true)); + GraphProperties properties(item); TF_CHECK_OK(properties.InferStatically(false)); } diff --git a/tensorflow/core/grappler/graph_view.cc b/tensorflow/core/grappler/graph_view.cc index 0d3f94854b..3e448216f9 100644 --- a/tensorflow/core/grappler/graph_view.cc +++ b/tensorflow/core/grappler/graph_view.cc @@ -173,5 +173,54 @@ int GraphView::NumFanins(const NodeDef& node, return count; } +std::unordered_set +GraphView::GetFanoutEdges(const NodeDef& node, + bool include_controlled_edges) const { + std::unordered_set result; + OutputPort port; + port.node = const_cast(&node); + const int first_port_id = include_controlled_edges ? -1 : 0; + auto it = num_regular_outputs_.find(&node); + const int last_port_id = (it != num_regular_outputs_.end()) ? it->second : -1; + + for (int i = first_port_id; i <= last_port_id; ++i) { + port.port_id = i; + auto it = fanouts_.find(port); + if (it != fanouts_.end()) { + Edge fanout; + fanout.src.node = const_cast(&node); + fanout.src.port_id = i; + for (auto itr = it->second.begin(); itr != it->second.end(); ++itr) { + fanout.tgt = *itr; + result.insert(fanout); + } + } + } + return result; +} + +std::unordered_set +GraphView::GetFaninEdges(const NodeDef& node, + bool include_controlling_edges) const { + std::unordered_set result; + for (int i = 0; i < node.input_size(); ++i) { + Edge fanin; + fanin.tgt.node = const_cast(&node); + fanin.tgt.port_id = i; + string fanin_name = ParseNodeName(node.input(i), &fanin.src.port_id); + if (fanin.src.port_id < 0) { + if (!include_controlling_edges) { + break; + } + } + auto it = nodes_.find(fanin_name); + if (it != nodes_.end()) { + fanin.src.node = it->second; + result.insert(fanin); + } + } + return result; +} + } // end namespace grappler } // end namespace tensorflow diff --git a/tensorflow/core/grappler/graph_view.h b/tensorflow/core/grappler/graph_view.h index 173ce9c09c..c3baad0987 100644 --- a/tensorflow/core/grappler/graph_view.h +++ b/tensorflow/core/grappler/graph_view.h @@ -29,6 +29,8 @@ namespace grappler { class GraphView { public: struct Port { + Port() : node(nullptr), port_id(-1) {} + Port(NodeDef* n, int port) : node(n), port_id(port) {} NodeDef* node = nullptr; int port_id = -1; @@ -36,8 +38,16 @@ class GraphView { return node == other.node && port_id == other.port_id; } }; - struct InputPort : public Port {}; - struct OutputPort : public Port {}; + struct InputPort : public Port { + InputPort() = default; + InputPort(NodeDef* n, int port_id) : Port(n, port_id) {} + InputPort(const NodeDef* n, int port_id) + : Port(const_cast(n), port_id) {} + }; + struct OutputPort : public Port { + OutputPort() = default; + OutputPort(NodeDef* n, int port_id) : Port(n, port_id) {} + }; struct HashPort { std::size_t operator()(const Port& port) const { @@ -45,6 +55,20 @@ class GraphView { } }; + struct Edge { + OutputPort src; + InputPort tgt; + + bool operator==(const Edge& other) const { + return src == other.src && tgt == other.tgt; + } + }; + struct HashEdge { + std::size_t operator()(const Edge& edge) const { + return HashPort()(edge.src) + HashPort()(edge.tgt); + } + }; + explicit GraphView(GraphDef* graph); GraphDef* GetGraph() const { return graph_; } NodeDef* GetNode(const string& node_name) const; @@ -63,6 +87,7 @@ class GraphView { const OutputPort& port) const; std::unordered_set GetFanin( const InputPort& port) const; + // Special case: regular (i.e. non-control) input ports can only have one // fanin. const OutputPort GetRegularFanin(const InputPort& port) const; @@ -79,6 +104,13 @@ class GraphView { // controlling nodes iff include_controlling_nodes is true. int NumFanins(const NodeDef& node, bool include_controlling_nodes) const; + // Get all the edge in the immediate fanout (resp fanin) of a node. Include + // the control edges iff include_controlling_edges is true. + std::unordered_set GetFanoutEdges( + const NodeDef& node, bool include_controlled_edges) const; + std::unordered_set GetFaninEdges( + const NodeDef& node, bool include_controlling_edges) const; + private: GraphDef* graph_; std::unordered_map nodes_; -- GitLab From 3624fe7d063f8fa6fe5bd864ced291f520c54cdd Mon Sep 17 00:00:00 2001 From: Ruoxin Sang Date: Tue, 24 Apr 2018 14:42:07 -0700 Subject: [PATCH 228/434] Invalidate the StatCache as well as the FileBlockCache, as once the file is overwritten or removed, the stat will become outdated. PiperOrigin-RevId: 194148397 --- .../core/platform/cloud/expiring_lru_cache.h | 18 +++++++ .../platform/cloud/expiring_lru_cache_test.cc | 17 +++++++ .../core/platform/cloud/gcs_file_system.cc | 19 ++++--- .../core/platform/cloud/gcs_file_system.h | 3 ++ .../platform/cloud/gcs_file_system_test.cc | 50 +++++++++++++++++++ 5 files changed, 100 insertions(+), 7 deletions(-) diff --git a/tensorflow/core/platform/cloud/expiring_lru_cache.h b/tensorflow/core/platform/cloud/expiring_lru_cache.h index c738497ddd..e2d048f141 100644 --- a/tensorflow/core/platform/cloud/expiring_lru_cache.h +++ b/tensorflow/core/platform/cloud/expiring_lru_cache.h @@ -51,6 +51,14 @@ class ExpiringLRUCache { InsertLocked(key, value); } + // Delete the entry with key `key`. Return true if the entry was found for + // `key`, false if the entry was not found. In both cases, there is no entry + // with key `key` existed after the call. + bool Delete(const string& key) { + mutex_lock lock(mu_); + return DeleteLocked(key); + } + /// Look up the entry with key `key` and copy it to `value` if found. Returns /// true if an entry was found for `key`, and its timestamp is not more than /// max_age_ seconds in the past. @@ -141,6 +149,16 @@ class ExpiringLRUCache { } } + bool DeleteLocked(const string& key) EXCLUSIVE_LOCKS_REQUIRED(mu_) { + auto it = cache_.find(key); + if (it == cache_.end()) { + return false; + } + lru_list_.erase(it->second.lru_iterator); + cache_.erase(it); + return true; + } + /// The maximum age of entries in the cache, in seconds. A value of 0 means /// that no entry is ever placed in the cache. const uint64 max_age_; diff --git a/tensorflow/core/platform/cloud/expiring_lru_cache_test.cc b/tensorflow/core/platform/cloud/expiring_lru_cache_test.cc index 3bc6db3842..42879e80a9 100644 --- a/tensorflow/core/platform/cloud/expiring_lru_cache_test.cc +++ b/tensorflow/core/platform/cloud/expiring_lru_cache_test.cc @@ -174,5 +174,22 @@ TEST(ExpiringLRUCacheTest, Clear) { EXPECT_FALSE(cache.Lookup("d", &value)); } +TEST(ExpiringLRUCacheTest, Delete) { + // Insert an entry. + ExpiringLRUCache cache(1, 4); + cache.Insert("a", 1); + int value = 0; + EXPECT_TRUE(cache.Lookup("a", &value)); + EXPECT_EQ(value, 1); + + // Delete the entry. + EXPECT_TRUE(cache.Delete("a")); + EXPECT_FALSE(cache.Lookup("a", &value)); + + // Try deleting the entry again. + EXPECT_FALSE(cache.Delete("a")); + EXPECT_FALSE(cache.Lookup("a", &value)); +} + } // namespace } // namespace tensorflow diff --git a/tensorflow/core/platform/cloud/gcs_file_system.cc b/tensorflow/core/platform/cloud/gcs_file_system.cc index f0003fa784..2d9c99c124 100644 --- a/tensorflow/core/platform/cloud/gcs_file_system.cc +++ b/tensorflow/core/platform/cloud/gcs_file_system.cc @@ -857,14 +857,20 @@ Status GcsFileSystem::LoadBufferFromGCS(const string& filename, size_t offset, return Status::OK(); } +void GcsFileSystem::ClearFileCaches(const string& fname) { + file_block_cache_->RemoveFile(fname); + stat_cache_->Delete(fname); + // TODO(rxsang): Remove the patterns that matche the file in + // MatchingPathsCache as well. +} + Status GcsFileSystem::NewWritableFile(const string& fname, std::unique_ptr* result) { string bucket, object; TF_RETURN_IF_ERROR(ParseGcsPath(fname, false, &bucket, &object)); - result->reset(new GcsWritableFile( - bucket, object, this, &timeouts_, - [this, fname]() { file_block_cache_->RemoveFile(fname); }, - initial_retry_delay_usec_)); + result->reset(new GcsWritableFile(bucket, object, this, &timeouts_, + [this, fname]() { ClearFileCaches(fname); }, + initial_retry_delay_usec_)); return Status::OK(); } @@ -904,8 +910,7 @@ Status GcsFileSystem::NewAppendableFile(const string& fname, TF_RETURN_IF_ERROR(ParseGcsPath(fname, false, &bucket, &object)); result->reset(new GcsWritableFile( bucket, object, this, old_content_filename, &timeouts_, - [this, fname]() { file_block_cache_->RemoveFile(fname); }, - initial_retry_delay_usec_)); + [this, fname]() { ClearFileCaches(fname); }, initial_retry_delay_usec_)); return Status::OK(); } @@ -1277,7 +1282,7 @@ Status GcsFileSystem::DeleteFile(const string& fname) { request->SetDeleteRequest(); TF_RETURN_WITH_CONTEXT_IF_ERROR(request->Send(), " when deleting ", fname); - file_block_cache_->RemoveFile(fname); + ClearFileCaches(fname); return Status::OK(); } diff --git a/tensorflow/core/platform/cloud/gcs_file_system.h b/tensorflow/core/platform/cloud/gcs_file_system.h index 703c8d5778..99c94c1751 100644 --- a/tensorflow/core/platform/cloud/gcs_file_system.h +++ b/tensorflow/core/platform/cloud/gcs_file_system.h @@ -227,6 +227,9 @@ class GcsFileSystem : public FileSystem { Status LoadBufferFromGCS(const string& filename, size_t offset, size_t n, char* buffer, size_t* bytes_transferred); + // Clear all the caches related to the file with name `filename`. + void ClearFileCaches(const string& fname); + std::unique_ptr auth_provider_; std::unique_ptr http_request_factory_; std::unique_ptr file_block_cache_; diff --git a/tensorflow/core/platform/cloud/gcs_file_system_test.cc b/tensorflow/core/platform/cloud/gcs_file_system_test.cc index ca4b7722b6..c639299954 100644 --- a/tensorflow/core/platform/cloud/gcs_file_system_test.cc +++ b/tensorflow/core/platform/cloud/gcs_file_system_test.cc @@ -1551,6 +1551,56 @@ TEST(GcsFileSystemTest, DeleteFile_NoObjectName) { fs.DeleteFile("gs://bucket/").code()); } +TEST(GcsFileSystemTest, DeleteFile_StatCacheRemoved) { + std::vector requests( + {new FakeHttpRequest( + "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/" + "file.txt?fields=size%2Cupdated\n" + "Auth Token: fake_token\n" + "Timeouts: 5 1 10\n", + strings::StrCat("{\"size\": \"1010\"," + "\"updated\": \"2016-04-29T23:15:24.896Z\"}")), + new FakeHttpRequest("Uri: https://www.googleapis.com/storage/v1/b" + "/bucket/o/file.txt\n" + "Auth Token: fake_token\n" + "Timeouts: 5 1 10\n" + "Delete: yes\n", + ""), + new FakeHttpRequest( + "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/" + "file.txt?fields=size%2Cupdated\n" + "Auth Token: fake_token\n" + "Timeouts: 5 1 10\n", + "", errors::NotFound("404"), 404), + new FakeHttpRequest( + "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?" + "fields=items%2Fname%2CnextPageToken&prefix=file.txt%2F" + "&maxResults=1\n" + "Auth Token: fake_token\n" + "Timeouts: 5 1 10\n", + "{}")}); + GcsFileSystem fs( + std::unique_ptr(new FakeAuthProvider), + std::unique_ptr( + new FakeHttpRequestFactory(&requests)), + 16 /* block size */, 16 /* max bytes */, 0 /* max staleness */, + 3600 /* stat cache max age */, 0 /* stat cache max entries */, + 0 /* matching paths cache max age */, + 0 /* matching paths cache max entries */, 0 /* initial retry delay*/, + kTestTimeoutConfig, nullptr /* gcs additional header */); + + // Stats the file first so the stat is cached. + FileStatistics stat_before_deletion; + TF_EXPECT_OK(fs.Stat("gs://bucket/file.txt", &stat_before_deletion)); + EXPECT_EQ(1010, stat_before_deletion.length); + + TF_EXPECT_OK(fs.DeleteFile("gs://bucket/file.txt")); + + FileStatistics stat_after_deletion; + EXPECT_EQ(error::Code::NOT_FOUND, + fs.Stat("gs://bucket/file.txt", &stat_after_deletion).code()); +} + TEST(GcsFileSystemTest, DeleteDir_Empty) { std::vector requests({new FakeHttpRequest( "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?" -- GitLab From 03005b129691bf6db8cf8c8c5a82be70ac79571c Mon Sep 17 00:00:00 2001 From: Billy Lamberta Date: Tue, 24 Apr 2018 14:52:38 -0700 Subject: [PATCH 229/434] docs: install_linux, move GPU section below install procedures. --- tensorflow/docs_src/install/install_linux.md | 198 +++++++++---------- 1 file changed, 98 insertions(+), 100 deletions(-) diff --git a/tensorflow/docs_src/install/install_linux.md b/tensorflow/docs_src/install/install_linux.md index fa82ac9c40..c66d50c3cb 100644 --- a/tensorflow/docs_src/install/install_linux.md +++ b/tensorflow/docs_src/install/install_linux.md @@ -1,106 +1,25 @@ # Installing TensorFlow on Ubuntu -This guide explains how to install TensorFlow on Ubuntu. Although these -instructions might also work on other Linux variants, we have only -tested (and we only support) these instructions on machines meeting the -following requirements: +This guide explains how to install TensorFlow on Ubuntu Linux. While these +instructions may work on other Linux variants, they are tested and supported with +the following system requirements: - * 64-bit desktops or laptops - * Ubuntu 16.04 or higher +* 64-bit desktops or laptops +* Ubuntu 16.04 or higher -## Determine which TensorFlow to install +## Choose which TensorFlow to install -You must choose one of the following types of TensorFlow to install: +The following TensorFlow variants are available for installation: - * **TensorFlow with CPU support only**. If your system does not have a - NVIDIA® GPU, you must install this version. Note that this version of - TensorFlow is typically much easier to install (typically, - in 5 or 10 minutes), so even if you have an NVIDIA GPU, we recommend - installing this version first. - * **TensorFlow with GPU support**. TensorFlow programs typically run - significantly faster on a GPU than on a CPU. Therefore, if your - system has a NVIDIA® GPU meeting the prerequisites shown below and you - need to run performance-critical applications, you should ultimately - install this version. - - -### NVIDIA requirements to run TensorFlow with GPU support - -If you are installing TensorFlow with GPU support using one of the -mechanisms described in this guide, then the following NVIDIA software -must be installed on your system: - - * [CUDA Toolkit 9.0](http://nvidia.com/cuda). For details, see - [NVIDIA's documentation](http://docs.nvidia.com/cuda/cuda-installation-guide-linux/). - Ensure that you append the relevant CUDA pathnames to the - `LD_LIBRARY_PATH` environment variable as described in the - NVIDIA documentation. - * [cuDNN SDK v7](http://developer.nvidia.com/cudnn). For details, see - [NVIDIA's documentation](http://docs.nvidia.com/deeplearning/sdk/cudnn-install/). - Ensure that you create the `CUDA_HOME` environment variable as - described in the NVIDIA documentation. - * GPU card with CUDA Compute Capability 3.0 or higher for building - from source and 3.5 or higher for our binaries. See - [NVIDIA documentation](https://developer.nvidia.com/cuda-gpus) for - a list of supported GPU cards. - * [GPU drivers](http://nvidia.com/driver) supporting your version of the CUDA - Toolkit. - * The libcupti-dev library, which is the NVIDIA CUDA Profile Tools Interface. - This library provides advanced profiling support. To install this library, - issue the following command for CUDA Toolkit >= 8.0: - -
-    $ sudo apt-get install cuda-command-line-tools
-    
- - and add its path to your `LD_LIBRARY_PATH` environment variable: - -
-    $ export LD_LIBRARY_PATH=${LD_LIBRARY_PATH:+${LD_LIBRARY_PATH}:}/usr/local/cuda/extras/CUPTI/lib64
-    
- - For CUDA Toolkit <= 7.5 do: - -
-    $ sudo apt-get install libcupti-dev
-    
- - * **[OPTIONAL]** For optimized inferencing performance, you can also install - **NVIDIA TensorRT 3.0**. The minimal set of TensorRT runtime components needed - for use with the pre-built `tensorflow-gpu` package can be installed as follows: - -
-    $ wget https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1404/x86_64/nvinfer-runtime-trt-repo-ubuntu1404-3.0.4-ga-cuda9.0_1.0-1_amd64.deb
-    $ sudo dpkg -i nvinfer-runtime-trt-repo-ubuntu1404-3.0.4-ga-cuda9.0_1.0-1_amd64.deb
-    $ sudo apt-get update
-    $ sudo apt-get install -y --allow-downgrades libnvinfer-dev libcudnn7-dev=7.0.5.15-1+cuda9.0 libcudnn7=7.0.5.15-1+cuda9.0
-    
- - **IMPORTANT:** For compatibility with the pre-built `tensorflow-gpu` - package, please use the Ubuntu **14.04** package of TensorRT as shown above, - even when installing onto an Ubuntu 16.04 system.
-
- To build the TensorFlow-TensorRT integration module from source rather than - using pre-built binaries, see the [module documentation](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/tensorrt#using-tensorrt-in-tensorflow). - For detailed TensorRT installation instructions, see [NVIDIA's TensorRT documentation](http://docs.nvidia.com/deeplearning/sdk/tensorrt-install-guide/index.html).
-
- To avoid cuDNN version conflicts during later system upgrades, you can hold - the cuDNN version at 7.0.5: - -
-    $  sudo apt-mark hold libcudnn7 libcudnn7-dev
-    
- - To later allow upgrades, you can remove the hold: - -
-    $  sudo apt-mark unhold libcudnn7 libcudnn7-dev
-    
- -If you have an earlier version of the preceding packages, please upgrade to -the specified versions. If upgrading is not possible, then you may still run -TensorFlow with GPU support, if you @{$install_sources$install TensorFlow from Sources}. +* __TensorFlow with CPU support only__. If your system does not have a + NVIDIA® GPU, you must install this version. This version of TensorFlow is + usually easier to install, so even if you have an NVIDIA GPU, we recommend + installing this version first. +* __TensorFlow with GPU support__. TensorFlow programs usually run much faster on + a GPU instead of a CPU. If you run performance-critical applications and your + system has an NVIDIA® GPU that meets the prerequisites, you should install + this version. See [TensorFlow GPU support](#NVIDIARequirements) for details. ## How to install TensorFlow @@ -131,8 +50,8 @@ On Ubuntu, Python is automatically installed and `pip` is *usually* installed. Confirm the `python` and `pip` versions:
-  python -V
-  pip -V  # or: pip3 -V
+  python -V  # or: python3 -V
+  pip -V     # or: pip3 -V
 
To install these packages on Ubuntu: @@ -264,8 +183,8 @@ On Ubuntu, Python is automatically installed and `pip` is *usually* installed. Confirm the `python` and `pip` versions:
-  python -V
-  pip -V  # or: pip3 -V
+  python -V  # or: python3 -V
+  pip -V     # or: pip3 -V
 
To install these packages on Ubuntu: @@ -578,6 +497,85 @@ If you are new to machine learning, we recommend the following: * @{$get_started/eager} + +## TensorFlow GPU support + +To install TensorFlow with GPU support, configure the following NVIDIA® software +on your system: + +* [CUDA Toolkit 9.0](http://nvidia.com/cuda). For details, see + [NVIDIA's documentation](http://docs.nvidia.com/cuda/cuda-installation-guide-linux/). + Append the relevant CUDA pathnames to the `LD_LIBRARY_PATH` environmental + variable as described in the NVIDIA documentation. +* [cuDNN SDK v7](http://developer.nvidia.com/cudnn). For details, see + [NVIDIA's documentation](http://docs.nvidia.com/deeplearning/sdk/cudnn-install/). + Create the `CUDA_HOME` environment variable as described in the NVIDIA + documentation. +* A GPU card with CUDA Compute Capability 3.0 or higher for building TensorFlow + from source. To use the TensorFlow binaries, version 3.5 or higher is required. + See the [NVIDIA documentation](https://developer.nvidia.com/cuda-gpus) for a + list of supported GPU cards. +* [GPU drivers](http://nvidia.com/driver) that support your version of the CUDA + Toolkit. +* The `libcupti-dev` library is the NVIDIA CUDA Profile Tools Interface. This + library provides advanced profiling support. To install this library, + use the following command for CUDA Toolkit >= 8.0: + +
+  sudo apt-get install cuda-command-line-tools
+
+ +Add this path to the `LD_LIBRARY_PATH` environmental variable: + +
+  export LD_LIBRARY_PATH=${LD_LIBRARY_PATH:+${LD_LIBRARY_PATH}:}/usr/local/cuda/extras/CUPTI/lib64
+
+ +For CUDA Toolkit <= 7.5 use: + +
+  sudo apt-get install libcupti-dev
+
+ +* *OPTIONAL*: For optimized performance during inference, install + *NVIDIA TensorRT 3.0*. To install the minimal amount of TensorRT + runtime components required to use with the pre-built `tensorflow-gpu` package: + +
+  wget https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1404/x86_64/nvinfer-runtime-trt-repo-ubuntu1404-3.0.4-ga-cuda9.0_1.0-1_amd64.deb
+  sudo dpkg -i nvinfer-runtime-trt-repo-ubuntu1404-3.0.4-ga-cuda9.0_1.0-1_amd64.deb
+  sudo apt-get update
+  sudo apt-get install -y --allow-downgrades libnvinfer-dev libcudnn7-dev=7.0.5.15-1+cuda9.0 libcudnn7=7.0.5.15-1+cuda9.0
+
+ +Note: For compatibility with the pre-built `tensorflow-gpu` package, use the +Ubuntu *14.04* package of TensorRT (shown above). Use this even when installing +on an Ubuntu 16.04 system. + +To build the TensorFlow-TensorRT integration module from source instead of using +the pre-built binaries, see the +[module documentation](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/tensorrt#using-tensorrt-in-tensorflow). +For detailed TensorRT installation instructions, see +[NVIDIA's TensorRT documentation](http://docs.nvidia.com/deeplearning/sdk/tensorrt-install-guide/index.html). + +To avoid cuDNN version conflicts during later system upgrades, hold the cuDNN +version at 7.0.5: + +
+  sudo apt-mark hold libcudnn7 libcudnn7-dev
+
+ +To allow upgrades, remove the this hold: + +
+  sudo apt-mark unhold libcudnn7 libcudnn7-dev
+
+ +If you have an earlier version of the preceding packages, upgrade to the +specified versions. If upgrading is not possible, you can still run TensorFlow +with GPU support by @{$install_sources}. + + ## Common installation problems We are relying on Stack Overflow to document TensorFlow installation problems -- GitLab From 184c8306a4a3d41f42f077b4898933500d61ce86 Mon Sep 17 00:00:00 2001 From: Igor Saprykin Date: Tue, 24 Apr 2018 14:52:59 -0700 Subject: [PATCH 230/434] Add deprecation notice to replicate_model_fn. PiperOrigin-RevId: 194150426 --- tensorflow/contrib/estimator/BUILD | 1 + .../estimator/python/estimator/replicate_model_fn.py | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/tensorflow/contrib/estimator/BUILD b/tensorflow/contrib/estimator/BUILD index 62ddb3d290..b473de86ee 100644 --- a/tensorflow/contrib/estimator/BUILD +++ b/tensorflow/contrib/estimator/BUILD @@ -367,6 +367,7 @@ py_library( "//tensorflow/python:sparse_tensor", "//tensorflow/python:state_ops", "//tensorflow/python:training", + "//tensorflow/python:util", "//tensorflow/python:variable_scope", "//tensorflow/python/estimator:export_output", "//tensorflow/python/estimator:model_fn", diff --git a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py index a8774d6dab..f8564446e5 100644 --- a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py +++ b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py @@ -47,8 +47,12 @@ from tensorflow.python.ops.losses import losses from tensorflow.python.platform import tf_logging from tensorflow.python.training import device_setter as device_setter_lib from tensorflow.python.training import optimizer as optimizer_lib +from tensorflow.python.util import deprecation +@deprecation.deprecated( + '2018-05-31', + 'Please use `tf.contrib.distribute.MirroredStrategy` instead.') def replicate_model_fn(model_fn, loss_reduction=losses.Reduction.SUM_BY_NONZERO_WEIGHTS, devices=None): @@ -255,6 +259,9 @@ class TowerOptimizer(optimizer_lib.Optimizer): COLLECTION_FOR_GRAPH_STATES = 'replicate_model_fn_graph_states' + @deprecation.deprecated( + '2018-05-31', + 'Please use `tf.contrib.distribute.MirroredStrategy` instead.') def __init__(self, optimizer_or_optimizer_fn): """Wrap an existing optimizer for gathering gradients across towers. -- GitLab From c13af7d5a2bde4cedd28336e688f15d9bc0d886c Mon Sep 17 00:00:00 2001 From: gracehoney <31743510+aaroey@users.noreply.github.com> Date: Tue, 24 Apr 2018 14:55:47 -0700 Subject: [PATCH 231/434] Fix a bug where string::substr is used with wrong position. --- .../contrib/tensorrt/convert/convert_graph.cc | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc index b412b296e0..0774027711 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc @@ -111,20 +111,22 @@ void GetSubGraphOutgoingEdges(const tensorflow::Graph& graph, } } -std::pair ParseTensorName(string name, int default_idx = 0) { +std::pair ParseTensorName(const string& name, + int default_idx = 0) { + string name_no_idx = name; int idx = default_idx; - size_t sep = name.find_last_of(':'); + const size_t sep = name_no_idx.find_last_of(':'); if (sep != string::npos) { - name = name.substr(0, sep); + name_no_idx = name_no_idx.substr(0, sep); idx = std::stoi(name.substr(sep + 1)); } - return std::make_pair(name, idx); + return std::make_pair(name_no_idx, idx); } std::unordered_map> BuildTensorNameMap( const std::vector& tensor_names) { std::unordered_map> result; - for (string const& tensor_name : tensor_names) { + for (const string& tensor_name : tensor_names) { string node_name; int index; std::tie(node_name, index) = ParseTensorName(tensor_name); @@ -132,6 +134,7 @@ std::unordered_map> BuildTensorNameMap( } return result; } + // TODO(sami): convert references to pointers struct ConvertGraphParams { ConvertGraphParams( -- GitLab From e7db82f821a1c522eed9e0c633df8b3db26ef38d Mon Sep 17 00:00:00 2001 From: Skye Wanderman-Milne Date: Tue, 24 Apr 2018 15:45:50 -0700 Subject: [PATCH 232/434] Make TF functions work with _USE_C_SHAPES=True. It turns out regular functions need to manually copy handle data in addition to eager GraphModeFunctions, so I moved the C extensions to python_api.h from eager/c_api.h. This also cleans up function_test.py to assume the C API is enabled. PiperOrigin-RevId: 194158700 --- tensorflow/c/eager/BUILD | 2 - tensorflow/c/eager/c_api.cc | 57 ------------------- tensorflow/c/eager/c_api.h | 14 ----- tensorflow/c/python_api.cc | 28 ++++++++- tensorflow/c/python_api.h | 12 +++- tensorflow/python/client/tf_session.i | 2 +- tensorflow/python/eager/function.py | 2 +- tensorflow/python/framework/function.py | 10 +++- tensorflow/python/framework/function_test.py | 37 +++--------- tensorflow/python/framework/ops.py | 4 +- .../python/ops/resource_variable_ops.py | 9 +-- tensorflow/python/pywrap_tfe.i | 2 - 12 files changed, 59 insertions(+), 120 deletions(-) diff --git a/tensorflow/c/eager/BUILD b/tensorflow/c/eager/BUILD index fae922ea3b..1432119162 100644 --- a/tensorflow/c/eager/BUILD +++ b/tensorflow/c/eager/BUILD @@ -40,8 +40,6 @@ tf_cuda_library( "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//tensorflow/core:protos_all_cc", - # TODO(b/74620627): move this here - "//tensorflow/python:cpp_shape_inference_proto_cc", ], }) + select({ "//tensorflow:with_xla_support": [ diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc index 975bde7c7f..3bf071f3ab 100644 --- a/tensorflow/c/eager/c_api.cc +++ b/tensorflow/c/eager/c_api.cc @@ -48,7 +48,6 @@ limitations under the License. #include "tensorflow/core/platform/mutex.h" #include "tensorflow/core/platform/thread_annotations.h" #include "tensorflow/core/public/version.h" -#include "tensorflow/python/framework/cpp_shape_inference.pb.h" using tensorflow::int64; using tensorflow::string; @@ -503,62 +502,6 @@ void TFE_ContextExportRunMetadata(TFE_Context* ctx, TF_Buffer* buf, ctx->context.RunMetadataProto()->Clear(); } -void TFE_GetResourceHandleShapeAndType(TF_Graph* graph, TF_Output output, - TF_Buffer* output_proto, - TF_Status* status) { - tensorflow::Node* node = &output.oper->node; - tensorflow::CppShapeInferenceResult::HandleData handle_data; - handle_data.set_is_set(true); - { - tensorflow::mutex_lock l(graph->mu); - tensorflow::shape_inference::InferenceContext* ic = - graph->refiner.GetContext(node); - CHECK(ic != nullptr); - CHECK_LT(output.index, ic->num_outputs()); - const auto* shapes_and_types = - ic->output_handle_shapes_and_types(output.index); - if (shapes_and_types == nullptr) { - output_proto->data = nullptr; - output_proto->length = 0; - output_proto->data_deallocator = nullptr; - return; - } - - for (const auto& p : *shapes_and_types) { - auto* out_shape_and_type = handle_data.add_shape_and_type(); - ic->ShapeHandleToProto(p.shape, out_shape_and_type->mutable_shape()); - out_shape_and_type->set_dtype(p.dtype); - } - } - status->status = MessageToBuffer(handle_data, output_proto); -} - -void TFE_SetResourceHandleShapeAndType(TF_Graph* graph, TF_Output output, - const void* proto, size_t proto_len, - TF_Status* status) { - tensorflow::CppShapeInferenceResult::HandleData handle_data; - if (!handle_data.ParseFromArray(proto, proto_len)) { - status->status = tensorflow::errors::InvalidArgument( - "Couldn't deserialize HandleData proto"); - return; - } - DCHECK(handle_data.is_set()); - - tensorflow::mutex_lock l(graph->mu); - tensorflow::shape_inference::InferenceContext* ic = - graph->refiner.GetContext(&output.oper->node); - - std::vector shapes_and_types; - for (const auto& shape_and_type_proto : handle_data.shape_and_type()) { - tensorflow::shape_inference::ShapeHandle shape; - status->status = - ic->MakeShapeFromShapeProto(shape_and_type_proto.shape(), &shape); - if (status->status.ok()) return; - shapes_and_types.emplace_back(shape, shape_and_type_proto.dtype()); - } - ic->set_output_handle_shapes_and_types(output.index, shapes_and_types); -} - namespace { TFE_Op* GetFunc(TFE_Context* ctx, const tensorflow::NameAttrList& func, TF_Status* status) { diff --git a/tensorflow/c/eager/c_api.h b/tensorflow/c/eager/c_api.h index ba77f3cd07..c06ce84a8c 100644 --- a/tensorflow/c/eager/c_api.h +++ b/tensorflow/c/eager/c_api.h @@ -329,20 +329,6 @@ TF_CAPI_EXPORT extern void TFE_ContextExportRunMetadata(TFE_Context* ctx, TF_Buffer* buf, TF_Status* status); -// Returns the serialized CppShapeInferenceResult::HandleData proto for -// `output` if its a resource tensor, or otherwise returns an empty buffer. -TF_CAPI_EXPORT extern void TFE_GetResourceHandleShapeAndType( - TF_Graph* graph, TF_Output output, TF_Buffer* output_proto, - TF_Status* status); - -// Sets `output` based on `proto`, which should be a serialized -// CppShapeInferenceResult::HandleData proto. -TF_CAPI_EXPORT extern void TFE_SetResourceHandleShapeAndType(TF_Graph* graph, - TF_Output output, - const void* proto, - size_t proto_len, - TF_Status* status); - #ifdef __cplusplus } /* end extern "C" */ #endif diff --git a/tensorflow/c/python_api.cc b/tensorflow/c/python_api.cc index 93155998b8..e18fdf6c57 100644 --- a/tensorflow/c/python_api.cc +++ b/tensorflow/c/python_api.cc @@ -110,7 +110,7 @@ void ExtendSession(TF_Session* session, TF_Status* status) { session->extend_before_run = false; } -std::string ResourceHandleShapeAndType(TF_Graph* graph, TF_Output output) { +std::string GetResourceHandleShapeAndType(TF_Graph* graph, TF_Output output) { Node* node = &output.oper->node; CppShapeInferenceResult::HandleData handle_data; handle_data.set_is_set(true); @@ -135,4 +135,30 @@ std::string ResourceHandleShapeAndType(TF_Graph* graph, TF_Output output) { return result; } +void SetResourceHandleShapeAndType(TF_Graph* graph, TF_Output output, + const void* proto, size_t proto_len, + TF_Status* status) { + tensorflow::CppShapeInferenceResult::HandleData handle_data; + if (!handle_data.ParseFromArray(proto, proto_len)) { + status->status = tensorflow::errors::InvalidArgument( + "Couldn't deserialize HandleData proto"); + return; + } + DCHECK(handle_data.is_set()); + + tensorflow::mutex_lock l(graph->mu); + tensorflow::shape_inference::InferenceContext* ic = + graph->refiner.GetContext(&output.oper->node); + + std::vector shapes_and_types; + for (const auto& shape_and_type_proto : handle_data.shape_and_type()) { + tensorflow::shape_inference::ShapeHandle shape; + status->status = + ic->MakeShapeFromShapeProto(shape_and_type_proto.shape(), &shape); + if (status->status.ok()) return; + shapes_and_types.emplace_back(shape, shape_and_type_proto.dtype()); + } + ic->set_output_handle_shapes_and_types(output.index, shapes_and_types); +} + } // namespace tensorflow diff --git a/tensorflow/c/python_api.h b/tensorflow/c/python_api.h index 2d4c8cd9ed..4bcb5bde62 100644 --- a/tensorflow/c/python_api.h +++ b/tensorflow/c/python_api.h @@ -55,9 +55,15 @@ void ExtendSession(TF_Session* session, TF_Status* status); // Returns the serialized CppShapeInferenceResult::HandleData proto for // `output` if its a resource tensor, or otherwise returns the empty string. -// TODO(b/74620627): remove when _USE_C_SHAPES is removed -std::string ResourceHandleShapeAndType(TF_Graph* graph, TF_Output output); - +std::string GetResourceHandleShapeAndType(TF_Graph* graph, TF_Output output); + +// Sets `output` based on `proto`, which should be a serialized +// CppShapeInferenceResult::HandleData proto. +// NOTE(skyewm): `proto` is passed a void*/size_t pair instead of a std::string +// because I couldn't get SWIG to work otherwise. +void SetResourceHandleShapeAndType(TF_Graph* graph, TF_Output output, + const void* proto, size_t proto_len, + TF_Status* status); } // namespace tensorflow #endif // TENSORFLOW_C_PYTHON_API_H_ diff --git a/tensorflow/python/client/tf_session.i b/tensorflow/python/client/tf_session.i index b82182d5d3..1db1432d65 100644 --- a/tensorflow/python/client/tf_session.i +++ b/tensorflow/python/client/tf_session.i @@ -458,7 +458,7 @@ TF_ImportGraphDefResultsMissingUnusedInputMappings_wrapper{ } // Override default py3 behavior of attempting to encode into Unicode. -%typemap(out) std::string tensorflow::ResourceHandleShapeAndType { +%typemap(out) std::string tensorflow::GetResourceHandleShapeAndType { $result = PyBytes_FromStringAndSize($1.data(), $1.size()); } diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py index b924448abe..bdbbe864df 100644 --- a/tensorflow/python/eager/function.py +++ b/tensorflow/python/eager/function.py @@ -80,7 +80,7 @@ def capture_value(tensor_map, value, dtype, name): if handle_data is not None and handle_data.is_set: # pylint: disable=protected-access if ops._USE_C_SHAPES: - pywrap_tensorflow.TFE_SetResourceHandleShapeAndType( + pywrap_tensorflow.SetResourceHandleShapeAndType( captured_value.graph._c_graph, captured_value._as_tf_output(), handle_data.SerializeToString()) else: diff --git a/tensorflow/python/framework/function.py b/tensorflow/python/framework/function.py index 9570f009a5..f343edc483 100644 --- a/tensorflow/python/framework/function.py +++ b/tensorflow/python/framework/function.py @@ -703,7 +703,15 @@ class _FuncGraph(ops.Graph): with ops.control_dependencies(None): ph = array_ops.placeholder(tensor.dtype, shape=tensor.get_shape()) # pylint: disable=protected-access - ph._handle_data = tensor._handle_data + if ops._USE_C_SHAPES: + handle_data = c_api.GetResourceHandleShapeAndType(tensor.graph._c_graph, + tensor._as_tf_output()) + if handle_data: + c_api.SetResourceHandleShapeAndType(ph.graph._c_graph, + ph._as_tf_output(), + compat.as_bytes(handle_data)) + else: + ph._handle_data = tensor._handle_data # pylint: enable=protected-access self._captured[tensor] = ph self.extra_args.append(ph) diff --git a/tensorflow/python/framework/function_test.py b/tensorflow/python/framework/function_test.py index d6bc14fbc7..cfdacee54f 100644 --- a/tensorflow/python/framework/function_test.py +++ b/tensorflow/python/framework/function_test.py @@ -85,7 +85,7 @@ def _OptimizerOptions(): yield cfg -@test_util.with_c_api +@test_util.with_c_shapes class FunctionTest(test.TestCase): """Test methods for verifying Function support. @@ -431,7 +431,6 @@ class FunctionTest(test.TestCase): "assertion failed.*-3"): self.assertAllEqual(Foo(constant_op.constant(-3.0)).eval(), 6.0) - @test_util.disable_c_api # Op._add_control_inputs doesn't work with C API def testAssertWrapper(self): @function.Defun(dtypes.float32) @@ -446,7 +445,6 @@ class FunctionTest(test.TestCase): "assertion"): _ = MyFn(100.0).eval() - @test_util.disable_c_api # Op._add_control_inputs doesn't work with C API def testWhileLoopCallsFunc(self): with self.test_session(use_gpu=True) as sess: @@ -466,7 +464,6 @@ class FunctionTest(test.TestCase): ans = sess.run(loop) self.assertAllClose(ans, 131072.) - @test_util.disable_c_api # Op._add_control_inputs doesn't work with C API def testControlFlowStrictness(self): """Inlined functions must not execute in a untaken control flow branch.""" @@ -1054,7 +1051,7 @@ class FunctionTest(test.TestCase): self.assertEqual((42.0, 44.0), sess.run((f_0, f_1))) -@test_util.with_c_api +@test_util.with_c_shapes class FunctionsFromProtos(test.TestCase): def expectFunctionsEqual(self, func, grad_func=None, new_func=None): @@ -1256,7 +1253,7 @@ class FunctionsFromProtos(test.TestCase): FunctionWithAttr.definition.attr["experimental_tag"].s, b"tag_value") -@test_util.with_c_api +@test_util.with_c_shapes class FunctionOverloadTest(test.TestCase): def testBasic(self): @@ -1309,7 +1306,7 @@ class FunctionOverloadTest(test.TestCase): "Successor of x.") -@test_util.with_c_api +@test_util.with_c_shapes class FunctionCaptureByValueTest(test.TestCase): def testCaptureByValue(self): @@ -1339,7 +1336,7 @@ class FunctionCaptureByValueTest(test.TestCase): self.assertAllEqual(y.eval(), [[12.0]]) -@test_util.with_c_api +@test_util.with_c_shapes class UnrollLSTMTest(test.TestCase): BATCH_SIZE = 16 LSTM_DIMS = 32 @@ -1475,7 +1472,7 @@ class UnrollLSTMTest(test.TestCase): self.assertAllClose(d0, d3, rtol=1e-4, atol=1e-4) -@test_util.with_c_api +@test_util.with_c_shapes class FunctionInlineControlTest(test.TestCase): def testFoo(self): @@ -1543,10 +1540,6 @@ def Linear2(w1, b1, w2, b2, x): return Linear(w2, b2, Linear(w1, b1, x)) -# Set C API before defining module level functions -ops._USE_C_API = True - - @function.Defun(*[dtypes.float32] * 3) def LinearWithCApi(w, b, x): return nn_ops.relu(math_ops.matmul(x, w) + b) @@ -1557,25 +1550,9 @@ def Linear2WithCApi(w1, b1, w2, b2, x): return LinearWithCApi(w2, b2, LinearWithCApi(w1, b1, x)) -# Unset C API after defining module level functions -ops._USE_C_API = False - - class ModuleFunctionTest(test.TestCase): def testBasic(self): - with ops.Graph().as_default(): - a, b, c, d, e = [ - constant_op.constant([[_]], dtype=dtypes.float32) for _ in range(5) - ] - y = Linear(a, b, c) - z = Linear2(a, b, c, d, e) - with session.Session() as sess: - self.assertAllEqual([[1]], sess.run(y)) - self.assertAllEqual([[5]], sess.run(z)) - - @test_util.enable_c_api - def testBasicWithCApi(self): with ops.Graph().as_default(): a, b, c, d, e = [ constant_op.constant([[_]], dtype=dtypes.float32) for _ in range(5) @@ -1587,7 +1564,7 @@ class ModuleFunctionTest(test.TestCase): self.assertAllEqual([[5]], sess.run(z)) -@test_util.with_c_api +@test_util.with_c_shapes class VariableHoistingTest(test.TestCase): def _testSimpleModel(self, use_forward_func, use_resource=False): diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index 8cd6820f6a..16a8c575c6 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -2557,8 +2557,8 @@ def _set_shape_and_handle_data_for_outputs_c_api(op): output._shape_val = output._c_api_shape() # Set the resource handle data for compatibility with the Python shape # inference code. - serialized = c_api.ResourceHandleShapeAndType( - op._graph._c_graph, output._as_tf_output()) + serialized = c_api.GetResourceHandleShapeAndType(op._graph._c_graph, + output._as_tf_output()) if serialized: output._handle_data = ( cpp_shape_inference_pb2.CppShapeInferenceResult.HandleData diff --git a/tensorflow/python/ops/resource_variable_ops.py b/tensorflow/python/ops/resource_variable_ops.py index 4d26b2f46e..1e953f658f 100644 --- a/tensorflow/python/ops/resource_variable_ops.py +++ b/tensorflow/python/ops/resource_variable_ops.py @@ -24,7 +24,6 @@ from tensorflow.core.framework import variable_pb2 from tensorflow.python import pywrap_tensorflow from tensorflow.python.eager import context from tensorflow.python.eager import tape -from tensorflow.python.framework import c_api_util from tensorflow.python.framework import cpp_shape_inference_pb2 from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops @@ -47,13 +46,11 @@ def get_resource_handle_data(graph_op): assert ops._USE_C_SHAPES # pylint: disable=protected-access assert type(graph_op) == ops.Tensor # pylint: disable=unidiomatic-typecheck - with c_api_util.tf_buffer() as buf: - pywrap_tensorflow.TFE_GetResourceHandleShapeAndType( - graph_op.graph._c_graph, graph_op._as_tf_output(), buf) # pylint: disable=protected-access - data = pywrap_tensorflow.TF_GetBuffer(buf) + handle_data = pywrap_tensorflow.GetResourceHandleShapeAndType( + graph_op.graph._c_graph, graph_op._as_tf_output()) # pylint: disable=protected-access return cpp_shape_inference_pb2.CppShapeInferenceResult.HandleData.FromString( - compat.as_bytes(data)) + compat.as_bytes(handle_data)) def _eager_safe_variable_handle(shape, dtype, shared_name, name, graph_mode): diff --git a/tensorflow/python/pywrap_tfe.i b/tensorflow/python/pywrap_tfe.i index 0982a67dee..5ee55301df 100644 --- a/tensorflow/python/pywrap_tfe.i +++ b/tensorflow/python/pywrap_tfe.i @@ -59,8 +59,6 @@ limitations under the License. %rename("%s") TFE_ContextOptionsSetAsync; %rename("%s") TFE_DeleteContextOptions; %rename("%s") TFE_Py_TensorShapeSlice; -%rename("%s") TFE_GetResourceHandleShapeAndType; -%rename("%s") TFE_SetResourceHandleShapeAndType; %{ #include "tensorflow/python/eager/pywrap_tfe.h" -- GitLab From d85610e5d25b4a9150446841d659a17ae1673ddd Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 24 Apr 2018 15:49:53 -0700 Subject: [PATCH 233/434] Fix flaky timeouts in metric_ops_test by sharding more. PiperOrigin-RevId: 194159328 --- tensorflow/contrib/metrics/BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/metrics/BUILD b/tensorflow/contrib/metrics/BUILD index 5ca42f41c1..e050f3c8d4 100644 --- a/tensorflow/contrib/metrics/BUILD +++ b/tensorflow/contrib/metrics/BUILD @@ -77,7 +77,7 @@ py_test( py_test( name = "metric_ops_test", srcs = ["python/ops/metric_ops_test.py"], - shard_count = 3, + shard_count = 8, srcs_version = "PY2AND3", tags = ["noasan"], # times out b/63678675 deps = [ -- GitLab From 29b23ba7afe79035eacf04886aa2636a093f12fb Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 24 Apr 2018 15:50:49 -0700 Subject: [PATCH 234/434] Add support for tensors to numpy array related assertion methods in test_util.TensorflowTestCase. PiperOrigin-RevId: 194159512 --- tensorflow/python/framework/test_util.py | 209 +++++++++++++++++- tensorflow/python/framework/test_util_test.py | 193 ++++++++++++++++ 2 files changed, 395 insertions(+), 7 deletions(-) diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py index 5a8bc43727..dc56d88066 100644 --- a/tensorflow/python/framework/test_util.py +++ b/tensorflow/python/framework/test_util.py @@ -21,6 +21,7 @@ from __future__ import print_function import contextlib import gc +import itertools import math import random import re @@ -1212,8 +1213,14 @@ class TensorFlowTestCase(googletest.TestCase): self.assertTrue(self._NDArrayNear(ndarray1, ndarray2, err), msg=msg) def _GetNdArray(self, a): + # If a is a tensor then convert it to ndarray + if isinstance(a, ops.Tensor): + if isinstance(a, ops._EagerTensorBase): + return a.numpy() + else: + a = self.evaluate(a) if not isinstance(a, np.ndarray): - a = np.array(a) + return np.array(a) return a def _assertArrayLikeAllClose(self, a, b, rtol=1e-6, atol=1e-6, msg=None): @@ -1286,8 +1293,8 @@ class TensorFlowTestCase(googletest.TestCase): # Try to directly compare a, b as ndarrays; if not work, then traverse # through the sequence, which is more expensive. try: - a_as_ndarray = np.array(a) - b_as_ndarray = np.array(b) + a_as_ndarray = self._GetNdArray(a) + b_as_ndarray = self._GetNdArray(b) self._assertArrayLikeAllClose( a_as_ndarray, b_as_ndarray, @@ -1322,16 +1329,18 @@ class TensorFlowTestCase(googletest.TestCase): raise def assertAllClose(self, a, b, rtol=1e-6, atol=1e-6, msg=None): - """Asserts that two structures of numpy arrays, have near values. + """Asserts that two structures of numpy arrays or Tensors, have near values. `a` and `b` can be arbitrarily nested structures. A layer of a nested structure can be a `dict`, `namedtuple`, `tuple` or `list`. Args: a: The expected numpy `ndarray`, or anything that can be converted into a - numpy `ndarray`, or any arbitrarily nested of structure of these. + numpy `ndarray` (including Tensor), or any arbitrarily nested of + structure of these. b: The actual numpy `ndarray`, or anything that can be converted into a - numpy `ndarray`, or any arbitrarily nested of structure of these. + numpy `ndarray` (including Tensor), or any arbitrarily nested of + structure of these. rtol: relative tolerance. atol: absolute tolerance. msg: Optional message to report on failure. @@ -1391,8 +1400,26 @@ class TensorFlowTestCase(googletest.TestCase): self.assertAllClose(a, b, rtol=rtol, atol=atol, msg=msg) + def assertNotAllClose(self, a, b, **kwargs): + """Assert that two numpy arrays, or or Tensors, do not have near values. + + Args: + a: the first value to compare. + b: the second value to compare. + **kwargs: additional keyword arguments to be passed to the underlying + `assertAllClose` call. + + Raises: + AssertionError: If `a` and `b` are unexpectedly close at all elements. + """ + try: + self.assertAllClose(a, b, **kwargs) + except AssertionError: + return + raise AssertionError("The two values are close at all elements") + def assertAllEqual(self, a, b, msg=None): - """Asserts that two numpy arrays have the same values. + """Asserts that two numpy arrays or Tensors have the same values. Args: a: the expected numpy ndarray or anything can be converted to one. @@ -1424,6 +1451,174 @@ class TensorFlowTestCase(googletest.TestCase): print("not equal rhs = ", y) np.testing.assert_array_equal(a, b, err_msg=msg) + def assertAllGreater(self, a, comparison_target): + """Assert element values are all greater than a target value. + + Args: + a: The numpy `ndarray`, or anything that can be converted into a + numpy `ndarray` (including Tensor). + comparison_target: The target value of comparison. + """ + a = self._GetNdArray(a) + self.assertGreater(np.min(a), comparison_target) + + def assertAllLess(self, a, comparison_target): + """Assert element values are all greater than a target value. + + Args: + a: The numpy `ndarray`, or anything that can be converted into a + numpy `ndarray` (including Tensor). + comparison_target: The target value of comparison. + """ + a = self._GetNdArray(a) + self.assertLess(np.max(a), comparison_target) + + def assertAllGreaterEqual(self, a, comparison_target): + """Assert element values are all greater than a target value. + + Args: + a: The numpy `ndarray`, or anything that can be converted into a + numpy `ndarray` (including Tensor). + comparison_target: The target value of comparison. + """ + a = self._GetNdArray(a) + self.assertGreaterEqual(np.min(a), comparison_target) + + def assertAllLessEqual(self, a, comparison_target): + """Assert element values are all greater than a target value. + + Args: + a: The numpy `ndarray`, or anything that can be converted into a + numpy `ndarray` (including Tensor). + comparison_target: The target value of comparison. + """ + a = self._GetNdArray(a) + self.assertLessEqual(np.max(a), comparison_target) + + def _format_subscripts(self, subscripts, value, limit=10, indent=2): + """Generate a summary of ndarray subscripts as a list of str. + + If limit == N, this method will print up to the first N subscripts on + separate + lines. A line of ellipses (...) will be appended at the end if the number of + subscripts exceeds N. + + Args: + subscripts: The tensor (np.ndarray) subscripts, of the same format as + np.where()'s return value, i.e., a tuple of arrays with each array + corresponding to a dimension. E.g., (array([1, 1]), array([0, 1])). + value: (np.ndarray) value of the tensor. + limit: (int) The maximum number of indices to print. + indent: (int) Number of characters to indent at the beginning of each + line. + + Returns: + (list of str) the multi-line representation of the subscripts and values, + potentially with omission at the end. + """ + lines = [] + subscripts = np.transpose(subscripts) + prefix = " " * indent + for subscript in itertools.islice(subscripts, limit): + lines.append(prefix + str(subscript) + " : " + + str(value[tuple(subscript)])) + if len(subscripts) > limit: + lines.append(prefix + "...") + return lines + + def assertAllInRange(self, + target, + lower_bound, + upper_bound, + open_lower_bound=False, + open_upper_bound=False): + """Assert that elements in a Tensor are all in a given range. + + Args: + target: The numpy `ndarray`, or anything that can be converted into a + numpy `ndarray` (including Tensor). + lower_bound: lower bound of the range + upper_bound: upper bound of the range + open_lower_bound: (`bool`) whether the lower bound is open (i.e., > rather + than the default >=) + open_upper_bound: (`bool`) whether the upper bound is open (i.e., < rather + than the default <=) + + Raises: + AssertionError: + if the value tensor does not have an ordered numeric type (float* or + int*), or + if there are nan values, or + if any of the elements do not fall in the specified range. + """ + target = self._GetNdArray(target) + if not (np.issubdtype(target.dtype, np.float) or + np.issubdtype(target.dtype, np.integer)): + raise AssertionError( + "The value of %s does not have an ordered numeric type, instead it " + "has type: %s" % (target, target.dtype)) + + nan_subscripts = np.where(np.isnan(target)) + if np.size(nan_subscripts): + raise AssertionError( + "%d of the %d element(s) are NaN. " + "Subscripts(s) and value(s) of the NaN element(s):\n" % + (len(nan_subscripts[0]), np.size(target)) + + "\n".join(self._format_subscripts(nan_subscripts, target))) + + range_str = (("(" if open_lower_bound else "[") + str(lower_bound) + ", " + + str(upper_bound) + (")" if open_upper_bound else "]")) + + violations = ( + np.less_equal(target, lower_bound) + if open_lower_bound else np.less(target, lower_bound)) + violations = np.logical_or( + violations, + np.greater_equal(target, upper_bound) + if open_upper_bound else np.greater(target, upper_bound)) + violation_subscripts = np.where(violations) + if np.size(violation_subscripts): + raise AssertionError( + "%d of the %d element(s) are outside the range %s. " % + (len(violation_subscripts[0]), np.size(target), range_str) + + "Subscript(s) and value(s) of the offending elements:\n" + + "\n".join(self._format_subscripts(violation_subscripts, target))) + + def assertAllInSet(self, target, expected_set): + """Assert that elements of a Tensor are all in a given closed set. + + Args: + target: The numpy `ndarray`, or anything that can be converted into a + numpy `ndarray` (including Tensor). + expected_set: (`list`, `tuple` or `set`) The closed set that the elements + of the value of `target` are expected to fall into. + + Raises: + AssertionError: + if any of the elements do not fall into `expected_set`. + """ + target = self._GetNdArray(target) + + # Elements in target that are not in expected_set. + diff = np.setdiff1d(target.flatten(), list(expected_set)) + if np.size(diff): + raise AssertionError("%d unique element(s) are not in the set %s: %s" % + (np.size(diff), expected_set, diff)) + + def assertDTypeEqual(self, target, expected_dtype): + """Assert ndarray data type is equal to expected. + + Args: + target: The numpy `ndarray`, or anything that can be converted into a + numpy `ndarray` (including Tensor). + expected_dtype: Expected data type. + """ + target = self._GetNdArray(target) + if not isinstance(target, list): + arrays = [target] + for arr in arrays: + self.assertEqual(arr.dtype, expected_dtype) + # pylint: disable=g-doc-return-or-yield @contextlib.contextmanager def assertRaisesWithPredicateMatch(self, exception_type, diff --git a/tensorflow/python/framework/test_util_test.py b/tensorflow/python/framework/test_util_test.py index 02ffa93bae..8d492256aa 100644 --- a/tensorflow/python/framework/test_util_test.py +++ b/tensorflow/python/framework/test_util_test.py @@ -31,13 +31,16 @@ from tensorflow.core.framework import graph_pb2 from tensorflow.core.protobuf import meta_graph_pb2 from tensorflow.python.eager import context from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.framework import test_ops # pylint: disable=unused-import from tensorflow.python.framework import test_util from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import math_ops from tensorflow.python.ops import random_ops from tensorflow.python.ops import resource_variable_ops +from tensorflow.python.ops import variables from tensorflow.python.platform import googletest @@ -209,6 +212,21 @@ class TestUtilTest(test_util.TensorFlowTestCase): self._WeMustGoDeeper("name") self._WeMustGoDeeper("orig") + def testAllCloseTensors(self): + a_raw_data = [[1, 2, 3], [4, 5, 6], [7, 8, 9]] + a = constant_op.constant(a_raw_data) + b = math_ops.add(1, constant_op.constant([[0, 1, 2], [3, 4, 5], [6, 7, 8]])) + self.assertAllClose(a, b) + self.assertAllClose(a, a_raw_data) + + a_dict = {"key": a} + b_dict = {"key": b} + self.assertAllClose(a_dict, b_dict) + + x_list = [a, b] + y_list = [a_raw_data, b] + self.assertAllClose(x_list, y_list) + def testAllCloseScalars(self): self.assertAllClose(7, 7 + 1e-8) with self.assertRaisesRegexp(AssertionError, r"Not equal to tolerance"): @@ -317,6 +335,12 @@ class TestUtilTest(test_util.TensorFlowTestCase): rtol=1e-8, atol=1e-8 ) + self.assertAllCloseAccordingToType( + constant_op.constant([1e-8], dtype=dtypes.float64), + constant_op.constant([2e-8], dtype=dtypes.float64), + rtol=1e-8, + atol=1e-8) + with (self.assertRaises(AssertionError)): self.assertAllCloseAccordingToType( np.asarray([1e-7], dtype=np.float64), @@ -332,6 +356,14 @@ class TestUtilTest(test_util.TensorFlowTestCase): float_rtol=1e-7, float_atol=1e-7 ) + self.assertAllCloseAccordingToType( + constant_op.constant([1e-7], dtype=dtypes.float32), + constant_op.constant([2e-7], dtype=dtypes.float32), + rtol=1e-8, + atol=1e-8, + float_rtol=1e-7, + float_atol=1e-7) + with (self.assertRaises(AssertionError)): self.assertAllCloseAccordingToType( np.asarray([1e-6], dtype=np.float32), @@ -349,6 +381,16 @@ class TestUtilTest(test_util.TensorFlowTestCase): half_rtol=1e-4, half_atol=1e-4 ) + self.assertAllCloseAccordingToType( + constant_op.constant([1e-4], dtype=dtypes.float16), + constant_op.constant([2e-4], dtype=dtypes.float16), + rtol=1e-8, + atol=1e-8, + float_rtol=1e-7, + float_atol=1e-7, + half_rtol=1e-4, + half_atol=1e-4) + with (self.assertRaises(AssertionError)): self.assertAllCloseAccordingToType( np.asarray([1e-3], dtype=np.float16), @@ -358,6 +400,157 @@ class TestUtilTest(test_util.TensorFlowTestCase): half_rtol=1e-4, half_atol=1e-4 ) + def testAssertAllEqual(self): + i = variables.Variable([100] * 3, dtype=dtypes.int32, name="i") + j = constant_op.constant([20] * 3, dtype=dtypes.int32, name="j") + k = math_ops.add(i, j, name="k") + + self.evaluate(variables.global_variables_initializer()) + self.assertAllEqual([120] * 3, k) + self.assertAllEqual([20] * 3, j) + + def testAssertNotAllClose(self): + # Test with arrays + self.assertNotAllClose([0.1], [0.2]) + with self.assertRaises(AssertionError): + self.assertNotAllClose([-1.0, 2.0], [-1.0, 2.0]) + + # Test with tensors + x = constant_op.constant([1.0, 1.0], name="x") + y = math_ops.add(x, x) + + self.assertAllClose([2.0, 2.0], y) + self.assertNotAllClose([0.9, 1.0], x) + + with self.assertRaises(AssertionError): + self.assertNotAllClose([1.0, 1.0], x) + + def testAssertNotAllCloseRTol(self): + # Test with arrays + with self.assertRaises(AssertionError): + self.assertNotAllClose([1.1, 2.1], [1.0, 2.0], rtol=0.2) + + # Test with tensors + x = constant_op.constant([1.0, 1.0], name="x") + y = math_ops.add(x, x) + + self.assertAllClose([2.0, 2.0], y) + + with self.assertRaises(AssertionError): + self.assertNotAllClose([0.9, 1.0], x, rtol=0.2) + + def testAssertNotAllCloseATol(self): + # Test with arrays + with self.assertRaises(AssertionError): + self.assertNotAllClose([1.1, 2.1], [1.0, 2.0], atol=0.2) + + # Test with tensors + x = constant_op.constant([1.0, 1.0], name="x") + y = math_ops.add(x, x) + + self.assertAllClose([2.0, 2.0], y) + + with self.assertRaises(AssertionError): + self.assertNotAllClose([0.9, 1.0], x, atol=0.2) + + def testAssertAllGreaterLess(self): + x = constant_op.constant([100.0, 110.0, 120.0], dtype=dtypes.float32) + y = constant_op.constant([10.0] * 3, dtype=dtypes.float32) + z = math_ops.add(x, y) + + self.assertAllClose([110.0, 120.0, 130.0], z) + + self.assertAllGreater(x, 95.0) + self.assertAllLess(x, 125.0) + + with self.assertRaises(AssertionError): + self.assertAllGreater(x, 105.0) + with self.assertRaises(AssertionError): + self.assertAllGreater(x, 125.0) + + with self.assertRaises(AssertionError): + self.assertAllLess(x, 115.0) + with self.assertRaises(AssertionError): + self.assertAllLess(x, 95.0) + + def testAssertAllGreaterLessEqual(self): + x = constant_op.constant([100.0, 110.0, 120.0], dtype=dtypes.float32) + y = constant_op.constant([10.0] * 3, dtype=dtypes.float32) + z = math_ops.add(x, y) + + self.assertAllEqual([110.0, 120.0, 130.0], z) + + self.assertAllGreaterEqual(x, 95.0) + self.assertAllLessEqual(x, 125.0) + + with self.assertRaises(AssertionError): + self.assertAllGreaterEqual(x, 105.0) + with self.assertRaises(AssertionError): + self.assertAllGreaterEqual(x, 125.0) + + with self.assertRaises(AssertionError): + self.assertAllLessEqual(x, 115.0) + with self.assertRaises(AssertionError): + self.assertAllLessEqual(x, 95.0) + + def testAssertAllInRangeWithNonNumericValuesFails(self): + s1 = constant_op.constant("Hello, ", name="s1") + c = constant_op.constant([1 + 2j, -3 + 5j], name="c") + b = constant_op.constant([False, True], name="b") + + with self.assertRaises(AssertionError): + self.assertAllInRange(s1, 0.0, 1.0) + with self.assertRaises(AssertionError): + self.assertAllInRange(c, 0.0, 1.0) + with self.assertRaises(AssertionError): + self.assertAllInRange(b, 0, 1) + + def testAssertAllInRange(self): + x = constant_op.constant([10.0, 15.0], name="x") + self.assertAllInRange(x, 10, 15) + + with self.assertRaises(AssertionError): + self.assertAllInRange(x, 10, 15, open_lower_bound=True) + with self.assertRaises(AssertionError): + self.assertAllInRange(x, 10, 15, open_upper_bound=True) + with self.assertRaises(AssertionError): + self.assertAllInRange( + x, 10, 15, open_lower_bound=True, open_upper_bound=True) + + def testAssertAllInRangeErrorMessageEllipses(self): + x_init = np.array([[10.0, 15.0]] * 12) + x = constant_op.constant(x_init, name="x") + with self.assertRaises(AssertionError): + self.assertAllInRange(x, 5, 10) + + def testAssertAllInRangeDetectsNaNs(self): + x = constant_op.constant( + [[np.nan, 0.0], [np.nan, np.inf], [np.inf, np.nan]], name="x") + with self.assertRaises(AssertionError): + self.assertAllInRange(x, 0.0, 2.0) + + def testAssertAllInRangeWithInfinities(self): + x = constant_op.constant([10.0, np.inf], name="x") + self.assertAllInRange(x, 10, np.inf) + with self.assertRaises(AssertionError): + self.assertAllInRange(x, 10, np.inf, open_upper_bound=True) + + def testAssertAllInSet(self): + b = constant_op.constant([True, False], name="b") + x = constant_op.constant([13, 37], name="x") + + self.assertAllInSet(b, [False, True]) + self.assertAllInSet(b, (False, True)) + self.assertAllInSet(b, {False, True}) + self.assertAllInSet(x, [0, 13, 37, 42]) + self.assertAllInSet(x, (0, 13, 37, 42)) + self.assertAllInSet(x, {0, 13, 37, 42}) + + with self.assertRaises(AssertionError): + self.assertAllInSet(b, [False]) + with self.assertRaises(AssertionError): + self.assertAllInSet(x, (42,)) + def testRandomSeed(self): # Call setUp again for WithCApi case (since it makes a new defeault graph # after setup). -- GitLab From 2ca2390277c2a4ea2d92fb72782bf30bfe00f592 Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Tue, 24 Apr 2018 16:34:01 -0700 Subject: [PATCH 235/434] Fixing the mock import error for devel docker. --- tensorflow/tools/docker/Dockerfile.devel | 1 + tensorflow/tools/docker/Dockerfile.devel-gpu | 1 + 2 files changed, 2 insertions(+) diff --git a/tensorflow/tools/docker/Dockerfile.devel b/tensorflow/tools/docker/Dockerfile.devel index 390d7442c3..5c49ac1d8d 100644 --- a/tensorflow/tools/docker/Dockerfile.devel +++ b/tensorflow/tools/docker/Dockerfile.devel @@ -31,6 +31,7 @@ RUN pip --no-cache-dir install \ ipykernel \ jupyter \ matplotlib \ + mock \ numpy \ scipy \ sklearn \ diff --git a/tensorflow/tools/docker/Dockerfile.devel-gpu b/tensorflow/tools/docker/Dockerfile.devel-gpu index 293028d229..196227861b 100644 --- a/tensorflow/tools/docker/Dockerfile.devel-gpu +++ b/tensorflow/tools/docker/Dockerfile.devel-gpu @@ -40,6 +40,7 @@ RUN pip --no-cache-dir install \ ipykernel \ jupyter \ matplotlib \ + mock \ numpy \ scipy \ sklearn \ -- GitLab From 2495ec22832c846b149c394aece2db19f2813b45 Mon Sep 17 00:00:00 2001 From: Igor Saprykin Date: Tue, 24 Apr 2018 16:52:29 -0700 Subject: [PATCH 236/434] Disable UseTowerEstimatorWithoutReplication.test_train_single_tower. PiperOrigin-RevId: 194168031 --- .../estimator/replicate_model_fn_test.py | 53 ------------------- 1 file changed, 53 deletions(-) diff --git a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py index 144b45982c..dd8a3a95f1 100644 --- a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py +++ b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py @@ -540,59 +540,6 @@ class ReplicateAcrossASingleDeviceWithoutTowerOptimizer( self.assertEqual(7.0, session.run(c)) -class UseTowerEstimatorWithoutReplication(test_util.TensorFlowTestCase): - - def model_fn(self, mode, features, labels, params): - c = variable_scope.get_variable( - 'c', - initializer=constant_op.constant(10, dtype=dtypes.float64), - dtype=dtypes.float64) - - features = features['features'] - predictions = math_ops.multiply(features, c) - - loss = losses.absolute_difference( - labels=labels, predictions=predictions, reduction=losses.Reduction.SUM) - loss = math_ops.reduce_sum(loss) - - metrics = { - 'accuracy': metrics_lib.accuracy(labels, predictions), - 'auc': metrics_lib.auc(labels, predictions) - } - - optimizer = replicate_model_fn.TowerOptimizer( - gradient_descent.GradientDescentOptimizer(params['learning_rate'])) - - return model_fn_lib.EstimatorSpec( - mode=mode, - loss=loss, - eval_metric_ops=metrics, - predictions={'probabilities': predictions}, - train_op=optimizer.minimize(loss)) - - @property - def params(self): - params = {} - params['learning_rate'] = 1.0 - return params - - def test_train_single_tower(self): - features = np.array([[1.0], [2.0]]) - labels = np.array([[1.0], [2.0]]) - - train_input_fn = numpy_io.numpy_input_fn( - x={'features': features}, y=labels, batch_size=2, shuffle=False) - - with self.test_session(): - estimator = estimator_lib.Estimator( - model_fn=self.model_fn, - model_dir=tempfile.mkdtemp(), - params=self.params) - estimator.train(train_input_fn, steps=1) - - self.assertEqual(7.0, estimator.get_variable_value('c')) - - class MakeSureSyncReplicasOptimizerWorks(test_util.TensorFlowTestCase): def model_fn(self, mode, features, labels, params): -- GitLab From 44203871672b85d936797cb60bab6731ad6a2824 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Tue, 24 Apr 2018 23:58:22 +0000 Subject: [PATCH 237/434] Enable int8 support for FloorDiv int8 is enabled for FloorDiv in math_ops.cc though the kernel was not registered. This fix register the int8 kernel for FloorDiv, and enables the test case for it. Signed-off-by: Yong Tang --- tensorflow/core/kernels/cwise_op_floor_div.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/kernels/cwise_op_floor_div.cc b/tensorflow/core/kernels/cwise_op_floor_div.cc index fecbf85989..24da61fdf6 100644 --- a/tensorflow/core/kernels/cwise_op_floor_div.cc +++ b/tensorflow/core/kernels/cwise_op_floor_div.cc @@ -16,8 +16,8 @@ limitations under the License. #include "tensorflow/core/kernels/cwise_ops_common.h" namespace tensorflow { -REGISTER5(BinaryOp, CPU, "FloorDiv", functor::safe_floor_div, uint8, uint16, - int16, int32, int64); +REGISTER6(BinaryOp, CPU, "FloorDiv", functor::safe_floor_div, uint8, uint16, + int8, int16, int32, int64); REGISTER3(BinaryOp, CPU, "FloorDiv", functor::floor_div_real, float, Eigen::half, double); -- GitLab From 552783ec41b9cd7fa678ebc6dd1c8371c69f8974 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Wed, 25 Apr 2018 00:00:45 +0000 Subject: [PATCH 238/434] Add np.int8, np.int16 test cases for div tests Signed-off-by: Yong Tang --- tensorflow/python/kernel_tests/division_past_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/kernel_tests/division_past_test.py b/tensorflow/python/kernel_tests/division_past_test.py index 2ff2f89407..e5c86719d3 100644 --- a/tensorflow/python/kernel_tests/division_past_test.py +++ b/tensorflow/python/kernel_tests/division_past_test.py @@ -36,7 +36,7 @@ class DivisionTestCase(test.TestCase): values = [1, 2, 7, 11] functions = (lambda x: x), constant_op.constant # TODO(irving): Test int8, int16 once we support casts for those. - dtypes = np.int32, np.int64, np.float32, np.float64 + dtypes = np.int8, np.int16, np.int32, np.int64, np.float32, np.float64 tensors = [] checks = [] -- GitLab From d42d3640a48a6eecf2696d1cfe247de8f571dccb Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Wed, 25 Apr 2018 00:01:27 +0000 Subject: [PATCH 239/434] Remove TODO as it is done now. Signed-off-by: Yong Tang --- tensorflow/python/kernel_tests/division_past_test.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/python/kernel_tests/division_past_test.py b/tensorflow/python/kernel_tests/division_past_test.py index e5c86719d3..9ddd62e63c 100644 --- a/tensorflow/python/kernel_tests/division_past_test.py +++ b/tensorflow/python/kernel_tests/division_past_test.py @@ -35,7 +35,6 @@ class DivisionTestCase(test.TestCase): """Test all the different ways to divide.""" values = [1, 2, 7, 11] functions = (lambda x: x), constant_op.constant - # TODO(irving): Test int8, int16 once we support casts for those. dtypes = np.int8, np.int16, np.int32, np.int64, np.float32, np.float64 tensors = [] -- GitLab From e871ea871fc39521dfa3c9f659b1d576c835c1e9 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 24 Apr 2018 17:02:46 -0700 Subject: [PATCH 240/434] Fixed typo in an error message. PiperOrigin-RevId: 194169339 --- tensorflow/core/kernels/string_split_op.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/kernels/string_split_op.cc b/tensorflow/core/kernels/string_split_op.cc index 9efbd66ef7..4c2b312c34 100644 --- a/tensorflow/core/kernels/string_split_op.cc +++ b/tensorflow/core/kernels/string_split_op.cc @@ -71,7 +71,7 @@ class StringSplitOp : public OpKernel { OP_REQUIRES_OK(ctx, ctx->input("delimiter", &delimiter_tensor)); OP_REQUIRES( ctx, TensorShapeUtils::IsScalar(delimiter_tensor->shape()), - errors::InvalidArgument("delimiter must scalar, got shape: ", + errors::InvalidArgument("delimiter must be a scalar, got shape: ", delimiter_tensor->shape().DebugString())); const auto delimiter_vec = delimiter_tensor->flat(); const string& delimiter = delimiter_vec(0); -- GitLab From 8b3c5e62be825d78bc25b3c4b6c65a44d47416e0 Mon Sep 17 00:00:00 2001 From: Akshay Agrawal Date: Tue, 24 Apr 2018 17:35:08 -0700 Subject: [PATCH 241/434] `PartitionedCallOp`: An op for executing multi-device functions. A `PartitionedCallOp` allows for execution of functions across multiple devices but within a single process. It proceeds by placing and partitioning the graph underlying a given function body, instantiating for each partitioned subgraph a function. The yielded function shards, which together are equivalent to the original function, are then executed. `PartitionedCallOp` is not part of the public TensorFlow API. PiperOrigin-RevId: 194173114 --- tensorflow/compiler/jit/BUILD | 37 --- .../jit/encapsulate_subgraphs_pass.cc | 2 +- .../jit/encapsulate_subgraphs_pass_test.cc | 2 +- tensorflow/compiler/tf2xla/BUILD | 1 - .../tf2xla/functionalize_control_flow.cc | 2 +- tensorflow/core/BUILD | 5 + .../base_api/api_def_PartitionedCall.pbtxt | 23 ++ .../python_api/api_def_PartitionedCall.pbtxt | 1 + .../framework}/graph_to_functiondef.cc | 4 +- .../framework}/graph_to_functiondef.h | 9 +- .../framework}/graph_to_functiondef_test.cc | 2 +- tensorflow/core/kernels/BUILD | 12 + .../core/kernels/partitioned_function_ops.cc | 279 ++++++++++++++++++ tensorflow/core/ops/functional_ops.cc | 9 + tensorflow/python/kernel_tests/BUILD | 1 + .../kernel_tests/functional_ops_test.py | 106 +++++++ tensorflow/python/ops/functional_ops.py | 5 +- 17 files changed, 450 insertions(+), 50 deletions(-) create mode 100644 tensorflow/core/api_def/base_api/api_def_PartitionedCall.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_PartitionedCall.pbtxt rename tensorflow/{compiler/jit => core/framework}/graph_to_functiondef.cc (98%) rename tensorflow/{compiler/jit => core/framework}/graph_to_functiondef.h (79%) rename tensorflow/{compiler/jit => core/framework}/graph_to_functiondef_test.cc (98%) create mode 100644 tensorflow/core/kernels/partitioned_function_ops.cc diff --git a/tensorflow/compiler/jit/BUILD b/tensorflow/compiler/jit/BUILD index 53b124cf89..af2965bba5 100644 --- a/tensorflow/compiler/jit/BUILD +++ b/tensorflow/compiler/jit/BUILD @@ -257,19 +257,6 @@ cc_library( alwayslink = 1, ) -cc_library( - name = "graph_to_functiondef", - srcs = ["graph_to_functiondef.cc"], - hdrs = ["graph_to_functiondef.h"], - visibility = [":friends"], - deps = [ - "//tensorflow/core:core_cpu", - "//tensorflow/core:framework", - "//tensorflow/core:lib", - "//tensorflow/core:protos_all_cc", - ], -) - cc_library( name = "create_xla_launch_op", srcs = [ @@ -300,7 +287,6 @@ cc_library( ], deps = [ ":common", - ":graph_to_functiondef", ":shape_inference_helpers", ":union_find", "//tensorflow/compiler/jit/graphcycles", @@ -347,28 +333,6 @@ tf_cc_test( ], ) -tf_cc_test( - name = "graph_to_functiondef_test", - size = "small", - srcs = [ - "graph_to_functiondef_test.cc", - ], - deps = [ - ":graph_to_functiondef", - "//tensorflow/cc:cc_ops", - "//tensorflow/cc:cc_ops_internal", - "//tensorflow/cc:function_ops", - "//tensorflow/cc:ops", - "//tensorflow/compiler/tf2xla:xla_compiler", - "//tensorflow/compiler/tf2xla/kernels:xla_ops", - "//tensorflow/core:core_cpu", - "//tensorflow/core:framework_internal", - "//tensorflow/core:test", - "//tensorflow/core:test_main", - "//tensorflow/core:testlib", - ], -) - tf_cc_test( name = "compilation_passes_test", size = "small", @@ -379,7 +343,6 @@ tf_cc_test( deps = [ ":common", ":compilation_passes", - ":graph_to_functiondef", "//tensorflow/cc:cc_ops", "//tensorflow/cc:cc_ops_internal", "//tensorflow/cc:function_ops", diff --git a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc index 7507e193b5..f06debaf31 100644 --- a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc +++ b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc @@ -22,7 +22,6 @@ limitations under the License. #include #include -#include "tensorflow/compiler/jit/graph_to_functiondef.h" #include "tensorflow/compiler/jit/graphcycles/graphcycles.h" #include "tensorflow/compiler/jit/legacy_flags/encapsulate_subgraphs_pass_flags.h" #include "tensorflow/compiler/jit/mark_for_compilation_pass.h" @@ -35,6 +34,7 @@ limitations under the License. #include "tensorflow/core/common_runtime/shape_refiner.h" #include "tensorflow/core/framework/function.h" #include "tensorflow/core/framework/graph_def_util.h" +#include "tensorflow/core/framework/graph_to_functiondef.h" #include "tensorflow/core/framework/node_def_builder.h" #include "tensorflow/core/framework/node_def_util.h" #include "tensorflow/core/graph/algorithm.h" diff --git a/tensorflow/compiler/jit/encapsulate_subgraphs_pass_test.cc b/tensorflow/compiler/jit/encapsulate_subgraphs_pass_test.cc index 3502d1bb45..5ec24d39a2 100644 --- a/tensorflow/compiler/jit/encapsulate_subgraphs_pass_test.cc +++ b/tensorflow/compiler/jit/encapsulate_subgraphs_pass_test.cc @@ -20,8 +20,8 @@ limitations under the License. #include "tensorflow/cc/framework/ops.h" #include "tensorflow/cc/ops/standard_ops.h" -#include "tensorflow/compiler/jit/graph_to_functiondef.h" #include "tensorflow/core/framework/function_testlib.h" +#include "tensorflow/core/framework/graph_to_functiondef.h" #include "tensorflow/core/graph/graph_constructor.h" #include "tensorflow/core/graph/graph_def_builder.h" #include "tensorflow/core/lib/core/status_test_util.h" diff --git a/tensorflow/compiler/tf2xla/BUILD b/tensorflow/compiler/tf2xla/BUILD index ba5c3a1484..942504e6bd 100644 --- a/tensorflow/compiler/tf2xla/BUILD +++ b/tensorflow/compiler/tf2xla/BUILD @@ -412,7 +412,6 @@ cc_library( hdrs = ["functionalize_control_flow.h"], deps = [ ":tf2xla_util", - "//tensorflow/compiler/jit:graph_to_functiondef", "//tensorflow/compiler/jit:union_find", "//tensorflow/compiler/tf2xla:dump_graph", "//tensorflow/compiler/tf2xla/ops:xla_ops", diff --git a/tensorflow/compiler/tf2xla/functionalize_control_flow.cc b/tensorflow/compiler/tf2xla/functionalize_control_flow.cc index 23629d85ae..8d1f268490 100644 --- a/tensorflow/compiler/tf2xla/functionalize_control_flow.cc +++ b/tensorflow/compiler/tf2xla/functionalize_control_flow.cc @@ -21,13 +21,13 @@ limitations under the License. #include #include -#include "tensorflow/compiler/jit/graph_to_functiondef.h" #include "tensorflow/compiler/jit/union_find.h" #include "tensorflow/compiler/tf2xla/dump_graph.h" #include "tensorflow/compiler/tf2xla/tf2xla_util.h" #include "tensorflow/compiler/xla/ptr_util.h" #include "tensorflow/compiler/xla/status_macros.h" #include "tensorflow/core/common_runtime/function.h" +#include "tensorflow/core/framework/graph_to_functiondef.h" #include "tensorflow/core/framework/node_def_builder.h" #include "tensorflow/core/graph/algorithm.h" #include "tensorflow/core/graph/control_flow.h" diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index bda87c6aed..e8f10f148d 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -545,6 +545,7 @@ tf_cuda_library( "framework/device_base.h", "framework/function.h", "framework/graph_def_util.h", + "framework/graph_to_functiondef.h", "framework/kernel_def_builder.h", "framework/log_memory.h", "framework/lookup_interface.h", @@ -999,6 +1000,7 @@ cc_library( "//tensorflow/core/kernels:nn", "//tensorflow/core/kernels:parameterized_truncated_normal_op", "//tensorflow/core/kernels:parsing", + "//tensorflow/core/kernels:partitioned_function_ops", "//tensorflow/core/kernels:random_ops", "//tensorflow/core/kernels:random_poisson_op", "//tensorflow/core/kernels:remote_fused_graph_ops", @@ -3061,6 +3063,7 @@ tf_cc_tests( "framework/common_shape_fns_test.cc", "framework/function_test.cc", "framework/graph_def_util_test.cc", + "framework/graph_to_functiondef_test.cc", "framework/kernel_def_builder_test.cc", "framework/memory_types_test.cc", "framework/node_def_builder_test.cc", @@ -3139,6 +3142,8 @@ tf_cc_tests( ":testlib", "//tensorflow/cc:cc_ops", "//tensorflow/cc:cc_ops_internal", + "//tensorflow/cc:function_ops", + "//tensorflow/cc:ops", "//tensorflow/cc:scope", "//tensorflow/cc:sendrecv_ops", "//tensorflow/cc:while_loop", diff --git a/tensorflow/core/api_def/base_api/api_def_PartitionedCall.pbtxt b/tensorflow/core/api_def/base_api/api_def_PartitionedCall.pbtxt new file mode 100644 index 0000000000..caf8172a52 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_PartitionedCall.pbtxt @@ -0,0 +1,23 @@ +op { + graph_op_name: "PartitionedCall" + in_arg { + name: "args" + description: "A list of input tensors." + } + out_arg { + name: "output" + description: "A list of return values." + } + attr { name: "Tin" description: "A list of input types." } + attr { name: "Tout" description: "A list of output types." } + attr { + name: "f" + description: <